2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
94 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
100 actual_temp = temp & 0x1ff;
102 actual_temp = actual_temp * 1000;
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
113 temp = RREG32_SMC(0xC0300E0C);
116 actual_temp = (temp / 8) - 49;
120 actual_temp = actual_temp * 1000;
126 * Indirect registers accessor
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
133 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134 WREG32(PCIE_INDEX, reg);
135 (void)RREG32(PCIE_INDEX);
136 r = RREG32(PCIE_DATA);
137 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
145 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146 WREG32(PCIE_INDEX, reg);
147 (void)RREG32(PCIE_INDEX);
148 WREG32(PCIE_DATA, v);
149 (void)RREG32(PCIE_DATA);
150 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
153 static const u32 spectre_rlc_save_restore_register_list[] =
155 (0x0e00 << 16) | (0xc12c >> 2),
157 (0x0e00 << 16) | (0xc140 >> 2),
159 (0x0e00 << 16) | (0xc150 >> 2),
161 (0x0e00 << 16) | (0xc15c >> 2),
163 (0x0e00 << 16) | (0xc168 >> 2),
165 (0x0e00 << 16) | (0xc170 >> 2),
167 (0x0e00 << 16) | (0xc178 >> 2),
169 (0x0e00 << 16) | (0xc204 >> 2),
171 (0x0e00 << 16) | (0xc2b4 >> 2),
173 (0x0e00 << 16) | (0xc2b8 >> 2),
175 (0x0e00 << 16) | (0xc2bc >> 2),
177 (0x0e00 << 16) | (0xc2c0 >> 2),
179 (0x0e00 << 16) | (0x8228 >> 2),
181 (0x0e00 << 16) | (0x829c >> 2),
183 (0x0e00 << 16) | (0x869c >> 2),
185 (0x0600 << 16) | (0x98f4 >> 2),
187 (0x0e00 << 16) | (0x98f8 >> 2),
189 (0x0e00 << 16) | (0x9900 >> 2),
191 (0x0e00 << 16) | (0xc260 >> 2),
193 (0x0e00 << 16) | (0x90e8 >> 2),
195 (0x0e00 << 16) | (0x3c000 >> 2),
197 (0x0e00 << 16) | (0x3c00c >> 2),
199 (0x0e00 << 16) | (0x8c1c >> 2),
201 (0x0e00 << 16) | (0x9700 >> 2),
203 (0x0e00 << 16) | (0xcd20 >> 2),
205 (0x4e00 << 16) | (0xcd20 >> 2),
207 (0x5e00 << 16) | (0xcd20 >> 2),
209 (0x6e00 << 16) | (0xcd20 >> 2),
211 (0x7e00 << 16) | (0xcd20 >> 2),
213 (0x8e00 << 16) | (0xcd20 >> 2),
215 (0x9e00 << 16) | (0xcd20 >> 2),
217 (0xae00 << 16) | (0xcd20 >> 2),
219 (0xbe00 << 16) | (0xcd20 >> 2),
221 (0x0e00 << 16) | (0x89bc >> 2),
223 (0x0e00 << 16) | (0x8900 >> 2),
226 (0x0e00 << 16) | (0xc130 >> 2),
228 (0x0e00 << 16) | (0xc134 >> 2),
230 (0x0e00 << 16) | (0xc1fc >> 2),
232 (0x0e00 << 16) | (0xc208 >> 2),
234 (0x0e00 << 16) | (0xc264 >> 2),
236 (0x0e00 << 16) | (0xc268 >> 2),
238 (0x0e00 << 16) | (0xc26c >> 2),
240 (0x0e00 << 16) | (0xc270 >> 2),
242 (0x0e00 << 16) | (0xc274 >> 2),
244 (0x0e00 << 16) | (0xc278 >> 2),
246 (0x0e00 << 16) | (0xc27c >> 2),
248 (0x0e00 << 16) | (0xc280 >> 2),
250 (0x0e00 << 16) | (0xc284 >> 2),
252 (0x0e00 << 16) | (0xc288 >> 2),
254 (0x0e00 << 16) | (0xc28c >> 2),
256 (0x0e00 << 16) | (0xc290 >> 2),
258 (0x0e00 << 16) | (0xc294 >> 2),
260 (0x0e00 << 16) | (0xc298 >> 2),
262 (0x0e00 << 16) | (0xc29c >> 2),
264 (0x0e00 << 16) | (0xc2a0 >> 2),
266 (0x0e00 << 16) | (0xc2a4 >> 2),
268 (0x0e00 << 16) | (0xc2a8 >> 2),
270 (0x0e00 << 16) | (0xc2ac >> 2),
272 (0x0e00 << 16) | (0xc2b0 >> 2),
274 (0x0e00 << 16) | (0x301d0 >> 2),
276 (0x0e00 << 16) | (0x30238 >> 2),
278 (0x0e00 << 16) | (0x30250 >> 2),
280 (0x0e00 << 16) | (0x30254 >> 2),
282 (0x0e00 << 16) | (0x30258 >> 2),
284 (0x0e00 << 16) | (0x3025c >> 2),
286 (0x4e00 << 16) | (0xc900 >> 2),
288 (0x5e00 << 16) | (0xc900 >> 2),
290 (0x6e00 << 16) | (0xc900 >> 2),
292 (0x7e00 << 16) | (0xc900 >> 2),
294 (0x8e00 << 16) | (0xc900 >> 2),
296 (0x9e00 << 16) | (0xc900 >> 2),
298 (0xae00 << 16) | (0xc900 >> 2),
300 (0xbe00 << 16) | (0xc900 >> 2),
302 (0x4e00 << 16) | (0xc904 >> 2),
304 (0x5e00 << 16) | (0xc904 >> 2),
306 (0x6e00 << 16) | (0xc904 >> 2),
308 (0x7e00 << 16) | (0xc904 >> 2),
310 (0x8e00 << 16) | (0xc904 >> 2),
312 (0x9e00 << 16) | (0xc904 >> 2),
314 (0xae00 << 16) | (0xc904 >> 2),
316 (0xbe00 << 16) | (0xc904 >> 2),
318 (0x4e00 << 16) | (0xc908 >> 2),
320 (0x5e00 << 16) | (0xc908 >> 2),
322 (0x6e00 << 16) | (0xc908 >> 2),
324 (0x7e00 << 16) | (0xc908 >> 2),
326 (0x8e00 << 16) | (0xc908 >> 2),
328 (0x9e00 << 16) | (0xc908 >> 2),
330 (0xae00 << 16) | (0xc908 >> 2),
332 (0xbe00 << 16) | (0xc908 >> 2),
334 (0x4e00 << 16) | (0xc90c >> 2),
336 (0x5e00 << 16) | (0xc90c >> 2),
338 (0x6e00 << 16) | (0xc90c >> 2),
340 (0x7e00 << 16) | (0xc90c >> 2),
342 (0x8e00 << 16) | (0xc90c >> 2),
344 (0x9e00 << 16) | (0xc90c >> 2),
346 (0xae00 << 16) | (0xc90c >> 2),
348 (0xbe00 << 16) | (0xc90c >> 2),
350 (0x4e00 << 16) | (0xc910 >> 2),
352 (0x5e00 << 16) | (0xc910 >> 2),
354 (0x6e00 << 16) | (0xc910 >> 2),
356 (0x7e00 << 16) | (0xc910 >> 2),
358 (0x8e00 << 16) | (0xc910 >> 2),
360 (0x9e00 << 16) | (0xc910 >> 2),
362 (0xae00 << 16) | (0xc910 >> 2),
364 (0xbe00 << 16) | (0xc910 >> 2),
366 (0x0e00 << 16) | (0xc99c >> 2),
368 (0x0e00 << 16) | (0x9834 >> 2),
370 (0x0000 << 16) | (0x30f00 >> 2),
372 (0x0001 << 16) | (0x30f00 >> 2),
374 (0x0000 << 16) | (0x30f04 >> 2),
376 (0x0001 << 16) | (0x30f04 >> 2),
378 (0x0000 << 16) | (0x30f08 >> 2),
380 (0x0001 << 16) | (0x30f08 >> 2),
382 (0x0000 << 16) | (0x30f0c >> 2),
384 (0x0001 << 16) | (0x30f0c >> 2),
386 (0x0600 << 16) | (0x9b7c >> 2),
388 (0x0e00 << 16) | (0x8a14 >> 2),
390 (0x0e00 << 16) | (0x8a18 >> 2),
392 (0x0600 << 16) | (0x30a00 >> 2),
394 (0x0e00 << 16) | (0x8bf0 >> 2),
396 (0x0e00 << 16) | (0x8bcc >> 2),
398 (0x0e00 << 16) | (0x8b24 >> 2),
400 (0x0e00 << 16) | (0x30a04 >> 2),
402 (0x0600 << 16) | (0x30a10 >> 2),
404 (0x0600 << 16) | (0x30a14 >> 2),
406 (0x0600 << 16) | (0x30a18 >> 2),
408 (0x0600 << 16) | (0x30a2c >> 2),
410 (0x0e00 << 16) | (0xc700 >> 2),
412 (0x0e00 << 16) | (0xc704 >> 2),
414 (0x0e00 << 16) | (0xc708 >> 2),
416 (0x0e00 << 16) | (0xc768 >> 2),
418 (0x0400 << 16) | (0xc770 >> 2),
420 (0x0400 << 16) | (0xc774 >> 2),
422 (0x0400 << 16) | (0xc778 >> 2),
424 (0x0400 << 16) | (0xc77c >> 2),
426 (0x0400 << 16) | (0xc780 >> 2),
428 (0x0400 << 16) | (0xc784 >> 2),
430 (0x0400 << 16) | (0xc788 >> 2),
432 (0x0400 << 16) | (0xc78c >> 2),
434 (0x0400 << 16) | (0xc798 >> 2),
436 (0x0400 << 16) | (0xc79c >> 2),
438 (0x0400 << 16) | (0xc7a0 >> 2),
440 (0x0400 << 16) | (0xc7a4 >> 2),
442 (0x0400 << 16) | (0xc7a8 >> 2),
444 (0x0400 << 16) | (0xc7ac >> 2),
446 (0x0400 << 16) | (0xc7b0 >> 2),
448 (0x0400 << 16) | (0xc7b4 >> 2),
450 (0x0e00 << 16) | (0x9100 >> 2),
452 (0x0e00 << 16) | (0x3c010 >> 2),
454 (0x0e00 << 16) | (0x92a8 >> 2),
456 (0x0e00 << 16) | (0x92ac >> 2),
458 (0x0e00 << 16) | (0x92b4 >> 2),
460 (0x0e00 << 16) | (0x92b8 >> 2),
462 (0x0e00 << 16) | (0x92bc >> 2),
464 (0x0e00 << 16) | (0x92c0 >> 2),
466 (0x0e00 << 16) | (0x92c4 >> 2),
468 (0x0e00 << 16) | (0x92c8 >> 2),
470 (0x0e00 << 16) | (0x92cc >> 2),
472 (0x0e00 << 16) | (0x92d0 >> 2),
474 (0x0e00 << 16) | (0x8c00 >> 2),
476 (0x0e00 << 16) | (0x8c04 >> 2),
478 (0x0e00 << 16) | (0x8c20 >> 2),
480 (0x0e00 << 16) | (0x8c38 >> 2),
482 (0x0e00 << 16) | (0x8c3c >> 2),
484 (0x0e00 << 16) | (0xae00 >> 2),
486 (0x0e00 << 16) | (0x9604 >> 2),
488 (0x0e00 << 16) | (0xac08 >> 2),
490 (0x0e00 << 16) | (0xac0c >> 2),
492 (0x0e00 << 16) | (0xac10 >> 2),
494 (0x0e00 << 16) | (0xac14 >> 2),
496 (0x0e00 << 16) | (0xac58 >> 2),
498 (0x0e00 << 16) | (0xac68 >> 2),
500 (0x0e00 << 16) | (0xac6c >> 2),
502 (0x0e00 << 16) | (0xac70 >> 2),
504 (0x0e00 << 16) | (0xac74 >> 2),
506 (0x0e00 << 16) | (0xac78 >> 2),
508 (0x0e00 << 16) | (0xac7c >> 2),
510 (0x0e00 << 16) | (0xac80 >> 2),
512 (0x0e00 << 16) | (0xac84 >> 2),
514 (0x0e00 << 16) | (0xac88 >> 2),
516 (0x0e00 << 16) | (0xac8c >> 2),
518 (0x0e00 << 16) | (0x970c >> 2),
520 (0x0e00 << 16) | (0x9714 >> 2),
522 (0x0e00 << 16) | (0x9718 >> 2),
524 (0x0e00 << 16) | (0x971c >> 2),
526 (0x0e00 << 16) | (0x31068 >> 2),
528 (0x4e00 << 16) | (0x31068 >> 2),
530 (0x5e00 << 16) | (0x31068 >> 2),
532 (0x6e00 << 16) | (0x31068 >> 2),
534 (0x7e00 << 16) | (0x31068 >> 2),
536 (0x8e00 << 16) | (0x31068 >> 2),
538 (0x9e00 << 16) | (0x31068 >> 2),
540 (0xae00 << 16) | (0x31068 >> 2),
542 (0xbe00 << 16) | (0x31068 >> 2),
544 (0x0e00 << 16) | (0xcd10 >> 2),
546 (0x0e00 << 16) | (0xcd14 >> 2),
548 (0x0e00 << 16) | (0x88b0 >> 2),
550 (0x0e00 << 16) | (0x88b4 >> 2),
552 (0x0e00 << 16) | (0x88b8 >> 2),
554 (0x0e00 << 16) | (0x88bc >> 2),
556 (0x0400 << 16) | (0x89c0 >> 2),
558 (0x0e00 << 16) | (0x88c4 >> 2),
560 (0x0e00 << 16) | (0x88c8 >> 2),
562 (0x0e00 << 16) | (0x88d0 >> 2),
564 (0x0e00 << 16) | (0x88d4 >> 2),
566 (0x0e00 << 16) | (0x88d8 >> 2),
568 (0x0e00 << 16) | (0x8980 >> 2),
570 (0x0e00 << 16) | (0x30938 >> 2),
572 (0x0e00 << 16) | (0x3093c >> 2),
574 (0x0e00 << 16) | (0x30940 >> 2),
576 (0x0e00 << 16) | (0x89a0 >> 2),
578 (0x0e00 << 16) | (0x30900 >> 2),
580 (0x0e00 << 16) | (0x30904 >> 2),
582 (0x0e00 << 16) | (0x89b4 >> 2),
584 (0x0e00 << 16) | (0x3c210 >> 2),
586 (0x0e00 << 16) | (0x3c214 >> 2),
588 (0x0e00 << 16) | (0x3c218 >> 2),
590 (0x0e00 << 16) | (0x8904 >> 2),
593 (0x0e00 << 16) | (0x8c28 >> 2),
594 (0x0e00 << 16) | (0x8c2c >> 2),
595 (0x0e00 << 16) | (0x8c30 >> 2),
596 (0x0e00 << 16) | (0x8c34 >> 2),
597 (0x0e00 << 16) | (0x9600 >> 2),
600 static const u32 kalindi_rlc_save_restore_register_list[] =
602 (0x0e00 << 16) | (0xc12c >> 2),
604 (0x0e00 << 16) | (0xc140 >> 2),
606 (0x0e00 << 16) | (0xc150 >> 2),
608 (0x0e00 << 16) | (0xc15c >> 2),
610 (0x0e00 << 16) | (0xc168 >> 2),
612 (0x0e00 << 16) | (0xc170 >> 2),
614 (0x0e00 << 16) | (0xc204 >> 2),
616 (0x0e00 << 16) | (0xc2b4 >> 2),
618 (0x0e00 << 16) | (0xc2b8 >> 2),
620 (0x0e00 << 16) | (0xc2bc >> 2),
622 (0x0e00 << 16) | (0xc2c0 >> 2),
624 (0x0e00 << 16) | (0x8228 >> 2),
626 (0x0e00 << 16) | (0x829c >> 2),
628 (0x0e00 << 16) | (0x869c >> 2),
630 (0x0600 << 16) | (0x98f4 >> 2),
632 (0x0e00 << 16) | (0x98f8 >> 2),
634 (0x0e00 << 16) | (0x9900 >> 2),
636 (0x0e00 << 16) | (0xc260 >> 2),
638 (0x0e00 << 16) | (0x90e8 >> 2),
640 (0x0e00 << 16) | (0x3c000 >> 2),
642 (0x0e00 << 16) | (0x3c00c >> 2),
644 (0x0e00 << 16) | (0x8c1c >> 2),
646 (0x0e00 << 16) | (0x9700 >> 2),
648 (0x0e00 << 16) | (0xcd20 >> 2),
650 (0x4e00 << 16) | (0xcd20 >> 2),
652 (0x5e00 << 16) | (0xcd20 >> 2),
654 (0x6e00 << 16) | (0xcd20 >> 2),
656 (0x7e00 << 16) | (0xcd20 >> 2),
658 (0x0e00 << 16) | (0x89bc >> 2),
660 (0x0e00 << 16) | (0x8900 >> 2),
663 (0x0e00 << 16) | (0xc130 >> 2),
665 (0x0e00 << 16) | (0xc134 >> 2),
667 (0x0e00 << 16) | (0xc1fc >> 2),
669 (0x0e00 << 16) | (0xc208 >> 2),
671 (0x0e00 << 16) | (0xc264 >> 2),
673 (0x0e00 << 16) | (0xc268 >> 2),
675 (0x0e00 << 16) | (0xc26c >> 2),
677 (0x0e00 << 16) | (0xc270 >> 2),
679 (0x0e00 << 16) | (0xc274 >> 2),
681 (0x0e00 << 16) | (0xc28c >> 2),
683 (0x0e00 << 16) | (0xc290 >> 2),
685 (0x0e00 << 16) | (0xc294 >> 2),
687 (0x0e00 << 16) | (0xc298 >> 2),
689 (0x0e00 << 16) | (0xc2a0 >> 2),
691 (0x0e00 << 16) | (0xc2a4 >> 2),
693 (0x0e00 << 16) | (0xc2a8 >> 2),
695 (0x0e00 << 16) | (0xc2ac >> 2),
697 (0x0e00 << 16) | (0x301d0 >> 2),
699 (0x0e00 << 16) | (0x30238 >> 2),
701 (0x0e00 << 16) | (0x30250 >> 2),
703 (0x0e00 << 16) | (0x30254 >> 2),
705 (0x0e00 << 16) | (0x30258 >> 2),
707 (0x0e00 << 16) | (0x3025c >> 2),
709 (0x4e00 << 16) | (0xc900 >> 2),
711 (0x5e00 << 16) | (0xc900 >> 2),
713 (0x6e00 << 16) | (0xc900 >> 2),
715 (0x7e00 << 16) | (0xc900 >> 2),
717 (0x4e00 << 16) | (0xc904 >> 2),
719 (0x5e00 << 16) | (0xc904 >> 2),
721 (0x6e00 << 16) | (0xc904 >> 2),
723 (0x7e00 << 16) | (0xc904 >> 2),
725 (0x4e00 << 16) | (0xc908 >> 2),
727 (0x5e00 << 16) | (0xc908 >> 2),
729 (0x6e00 << 16) | (0xc908 >> 2),
731 (0x7e00 << 16) | (0xc908 >> 2),
733 (0x4e00 << 16) | (0xc90c >> 2),
735 (0x5e00 << 16) | (0xc90c >> 2),
737 (0x6e00 << 16) | (0xc90c >> 2),
739 (0x7e00 << 16) | (0xc90c >> 2),
741 (0x4e00 << 16) | (0xc910 >> 2),
743 (0x5e00 << 16) | (0xc910 >> 2),
745 (0x6e00 << 16) | (0xc910 >> 2),
747 (0x7e00 << 16) | (0xc910 >> 2),
749 (0x0e00 << 16) | (0xc99c >> 2),
751 (0x0e00 << 16) | (0x9834 >> 2),
753 (0x0000 << 16) | (0x30f00 >> 2),
755 (0x0000 << 16) | (0x30f04 >> 2),
757 (0x0000 << 16) | (0x30f08 >> 2),
759 (0x0000 << 16) | (0x30f0c >> 2),
761 (0x0600 << 16) | (0x9b7c >> 2),
763 (0x0e00 << 16) | (0x8a14 >> 2),
765 (0x0e00 << 16) | (0x8a18 >> 2),
767 (0x0600 << 16) | (0x30a00 >> 2),
769 (0x0e00 << 16) | (0x8bf0 >> 2),
771 (0x0e00 << 16) | (0x8bcc >> 2),
773 (0x0e00 << 16) | (0x8b24 >> 2),
775 (0x0e00 << 16) | (0x30a04 >> 2),
777 (0x0600 << 16) | (0x30a10 >> 2),
779 (0x0600 << 16) | (0x30a14 >> 2),
781 (0x0600 << 16) | (0x30a18 >> 2),
783 (0x0600 << 16) | (0x30a2c >> 2),
785 (0x0e00 << 16) | (0xc700 >> 2),
787 (0x0e00 << 16) | (0xc704 >> 2),
789 (0x0e00 << 16) | (0xc708 >> 2),
791 (0x0e00 << 16) | (0xc768 >> 2),
793 (0x0400 << 16) | (0xc770 >> 2),
795 (0x0400 << 16) | (0xc774 >> 2),
797 (0x0400 << 16) | (0xc798 >> 2),
799 (0x0400 << 16) | (0xc79c >> 2),
801 (0x0e00 << 16) | (0x9100 >> 2),
803 (0x0e00 << 16) | (0x3c010 >> 2),
805 (0x0e00 << 16) | (0x8c00 >> 2),
807 (0x0e00 << 16) | (0x8c04 >> 2),
809 (0x0e00 << 16) | (0x8c20 >> 2),
811 (0x0e00 << 16) | (0x8c38 >> 2),
813 (0x0e00 << 16) | (0x8c3c >> 2),
815 (0x0e00 << 16) | (0xae00 >> 2),
817 (0x0e00 << 16) | (0x9604 >> 2),
819 (0x0e00 << 16) | (0xac08 >> 2),
821 (0x0e00 << 16) | (0xac0c >> 2),
823 (0x0e00 << 16) | (0xac10 >> 2),
825 (0x0e00 << 16) | (0xac14 >> 2),
827 (0x0e00 << 16) | (0xac58 >> 2),
829 (0x0e00 << 16) | (0xac68 >> 2),
831 (0x0e00 << 16) | (0xac6c >> 2),
833 (0x0e00 << 16) | (0xac70 >> 2),
835 (0x0e00 << 16) | (0xac74 >> 2),
837 (0x0e00 << 16) | (0xac78 >> 2),
839 (0x0e00 << 16) | (0xac7c >> 2),
841 (0x0e00 << 16) | (0xac80 >> 2),
843 (0x0e00 << 16) | (0xac84 >> 2),
845 (0x0e00 << 16) | (0xac88 >> 2),
847 (0x0e00 << 16) | (0xac8c >> 2),
849 (0x0e00 << 16) | (0x970c >> 2),
851 (0x0e00 << 16) | (0x9714 >> 2),
853 (0x0e00 << 16) | (0x9718 >> 2),
855 (0x0e00 << 16) | (0x971c >> 2),
857 (0x0e00 << 16) | (0x31068 >> 2),
859 (0x4e00 << 16) | (0x31068 >> 2),
861 (0x5e00 << 16) | (0x31068 >> 2),
863 (0x6e00 << 16) | (0x31068 >> 2),
865 (0x7e00 << 16) | (0x31068 >> 2),
867 (0x0e00 << 16) | (0xcd10 >> 2),
869 (0x0e00 << 16) | (0xcd14 >> 2),
871 (0x0e00 << 16) | (0x88b0 >> 2),
873 (0x0e00 << 16) | (0x88b4 >> 2),
875 (0x0e00 << 16) | (0x88b8 >> 2),
877 (0x0e00 << 16) | (0x88bc >> 2),
879 (0x0400 << 16) | (0x89c0 >> 2),
881 (0x0e00 << 16) | (0x88c4 >> 2),
883 (0x0e00 << 16) | (0x88c8 >> 2),
885 (0x0e00 << 16) | (0x88d0 >> 2),
887 (0x0e00 << 16) | (0x88d4 >> 2),
889 (0x0e00 << 16) | (0x88d8 >> 2),
891 (0x0e00 << 16) | (0x8980 >> 2),
893 (0x0e00 << 16) | (0x30938 >> 2),
895 (0x0e00 << 16) | (0x3093c >> 2),
897 (0x0e00 << 16) | (0x30940 >> 2),
899 (0x0e00 << 16) | (0x89a0 >> 2),
901 (0x0e00 << 16) | (0x30900 >> 2),
903 (0x0e00 << 16) | (0x30904 >> 2),
905 (0x0e00 << 16) | (0x89b4 >> 2),
907 (0x0e00 << 16) | (0x3e1fc >> 2),
909 (0x0e00 << 16) | (0x3c210 >> 2),
911 (0x0e00 << 16) | (0x3c214 >> 2),
913 (0x0e00 << 16) | (0x3c218 >> 2),
915 (0x0e00 << 16) | (0x8904 >> 2),
918 (0x0e00 << 16) | (0x8c28 >> 2),
919 (0x0e00 << 16) | (0x8c2c >> 2),
920 (0x0e00 << 16) | (0x8c30 >> 2),
921 (0x0e00 << 16) | (0x8c34 >> 2),
922 (0x0e00 << 16) | (0x9600 >> 2),
925 static const u32 bonaire_golden_spm_registers[] =
927 0x30800, 0xe0ffffff, 0xe0000000
930 static const u32 bonaire_golden_common_registers[] =
932 0xc770, 0xffffffff, 0x00000800,
933 0xc774, 0xffffffff, 0x00000800,
934 0xc798, 0xffffffff, 0x00007fbf,
935 0xc79c, 0xffffffff, 0x00007faf
938 static const u32 bonaire_golden_registers[] =
940 0x3354, 0x00000333, 0x00000333,
941 0x3350, 0x000c0fc0, 0x00040200,
942 0x9a10, 0x00010000, 0x00058208,
943 0x3c000, 0xffff1fff, 0x00140000,
944 0x3c200, 0xfdfc0fff, 0x00000100,
945 0x3c234, 0x40000000, 0x40000200,
946 0x9830, 0xffffffff, 0x00000000,
947 0x9834, 0xf00fffff, 0x00000400,
948 0x9838, 0x0002021c, 0x00020200,
949 0xc78, 0x00000080, 0x00000000,
950 0x5bb0, 0x000000f0, 0x00000070,
951 0x5bc0, 0xf0311fff, 0x80300000,
952 0x98f8, 0x73773777, 0x12010001,
953 0x350c, 0x00810000, 0x408af000,
954 0x7030, 0x31000111, 0x00000011,
955 0x2f48, 0x73773777, 0x12010001,
956 0x220c, 0x00007fb6, 0x0021a1b1,
957 0x2210, 0x00007fb6, 0x002021b1,
958 0x2180, 0x00007fb6, 0x00002191,
959 0x2218, 0x00007fb6, 0x002121b1,
960 0x221c, 0x00007fb6, 0x002021b1,
961 0x21dc, 0x00007fb6, 0x00002191,
962 0x21e0, 0x00007fb6, 0x00002191,
963 0x3628, 0x0000003f, 0x0000000a,
964 0x362c, 0x0000003f, 0x0000000a,
965 0x2ae4, 0x00073ffe, 0x000022a2,
966 0x240c, 0x000007ff, 0x00000000,
967 0x8a14, 0xf000003f, 0x00000007,
968 0x8bf0, 0x00002001, 0x00000001,
969 0x8b24, 0xffffffff, 0x00ffffff,
970 0x30a04, 0x0000ff0f, 0x00000000,
971 0x28a4c, 0x07ffffff, 0x06000000,
972 0x4d8, 0x00000fff, 0x00000100,
973 0x3e78, 0x00000001, 0x00000002,
974 0x9100, 0x03000000, 0x0362c688,
975 0x8c00, 0x000000ff, 0x00000001,
976 0xe40, 0x00001fff, 0x00001fff,
977 0x9060, 0x0000007f, 0x00000020,
978 0x9508, 0x00010000, 0x00010000,
979 0xac14, 0x000003ff, 0x000000f3,
980 0xac0c, 0xffffffff, 0x00001032
983 static const u32 bonaire_mgcg_cgcg_init[] =
985 0xc420, 0xffffffff, 0xfffffffc,
986 0x30800, 0xffffffff, 0xe0000000,
987 0x3c2a0, 0xffffffff, 0x00000100,
988 0x3c208, 0xffffffff, 0x00000100,
989 0x3c2c0, 0xffffffff, 0xc0000100,
990 0x3c2c8, 0xffffffff, 0xc0000100,
991 0x3c2c4, 0xffffffff, 0xc0000100,
992 0x55e4, 0xffffffff, 0x00600100,
993 0x3c280, 0xffffffff, 0x00000100,
994 0x3c214, 0xffffffff, 0x06000100,
995 0x3c220, 0xffffffff, 0x00000100,
996 0x3c218, 0xffffffff, 0x06000100,
997 0x3c204, 0xffffffff, 0x00000100,
998 0x3c2e0, 0xffffffff, 0x00000100,
999 0x3c224, 0xffffffff, 0x00000100,
1000 0x3c200, 0xffffffff, 0x00000100,
1001 0x3c230, 0xffffffff, 0x00000100,
1002 0x3c234, 0xffffffff, 0x00000100,
1003 0x3c250, 0xffffffff, 0x00000100,
1004 0x3c254, 0xffffffff, 0x00000100,
1005 0x3c258, 0xffffffff, 0x00000100,
1006 0x3c25c, 0xffffffff, 0x00000100,
1007 0x3c260, 0xffffffff, 0x00000100,
1008 0x3c27c, 0xffffffff, 0x00000100,
1009 0x3c278, 0xffffffff, 0x00000100,
1010 0x3c210, 0xffffffff, 0x06000100,
1011 0x3c290, 0xffffffff, 0x00000100,
1012 0x3c274, 0xffffffff, 0x00000100,
1013 0x3c2b4, 0xffffffff, 0x00000100,
1014 0x3c2b0, 0xffffffff, 0x00000100,
1015 0x3c270, 0xffffffff, 0x00000100,
1016 0x30800, 0xffffffff, 0xe0000000,
1017 0x3c020, 0xffffffff, 0x00010000,
1018 0x3c024, 0xffffffff, 0x00030002,
1019 0x3c028, 0xffffffff, 0x00040007,
1020 0x3c02c, 0xffffffff, 0x00060005,
1021 0x3c030, 0xffffffff, 0x00090008,
1022 0x3c034, 0xffffffff, 0x00010000,
1023 0x3c038, 0xffffffff, 0x00030002,
1024 0x3c03c, 0xffffffff, 0x00040007,
1025 0x3c040, 0xffffffff, 0x00060005,
1026 0x3c044, 0xffffffff, 0x00090008,
1027 0x3c048, 0xffffffff, 0x00010000,
1028 0x3c04c, 0xffffffff, 0x00030002,
1029 0x3c050, 0xffffffff, 0x00040007,
1030 0x3c054, 0xffffffff, 0x00060005,
1031 0x3c058, 0xffffffff, 0x00090008,
1032 0x3c05c, 0xffffffff, 0x00010000,
1033 0x3c060, 0xffffffff, 0x00030002,
1034 0x3c064, 0xffffffff, 0x00040007,
1035 0x3c068, 0xffffffff, 0x00060005,
1036 0x3c06c, 0xffffffff, 0x00090008,
1037 0x3c070, 0xffffffff, 0x00010000,
1038 0x3c074, 0xffffffff, 0x00030002,
1039 0x3c078, 0xffffffff, 0x00040007,
1040 0x3c07c, 0xffffffff, 0x00060005,
1041 0x3c080, 0xffffffff, 0x00090008,
1042 0x3c084, 0xffffffff, 0x00010000,
1043 0x3c088, 0xffffffff, 0x00030002,
1044 0x3c08c, 0xffffffff, 0x00040007,
1045 0x3c090, 0xffffffff, 0x00060005,
1046 0x3c094, 0xffffffff, 0x00090008,
1047 0x3c098, 0xffffffff, 0x00010000,
1048 0x3c09c, 0xffffffff, 0x00030002,
1049 0x3c0a0, 0xffffffff, 0x00040007,
1050 0x3c0a4, 0xffffffff, 0x00060005,
1051 0x3c0a8, 0xffffffff, 0x00090008,
1052 0x3c000, 0xffffffff, 0x96e00200,
1053 0x8708, 0xffffffff, 0x00900100,
1054 0xc424, 0xffffffff, 0x0020003f,
1055 0x38, 0xffffffff, 0x0140001c,
1056 0x3c, 0x000f0000, 0x000f0000,
1057 0x220, 0xffffffff, 0xC060000C,
1058 0x224, 0xc0000fff, 0x00000100,
1059 0xf90, 0xffffffff, 0x00000100,
1060 0xf98, 0x00000101, 0x00000000,
1061 0x20a8, 0xffffffff, 0x00000104,
1062 0x55e4, 0xff000fff, 0x00000100,
1063 0x30cc, 0xc0000fff, 0x00000104,
1064 0xc1e4, 0x00000001, 0x00000001,
1065 0xd00c, 0xff000ff0, 0x00000100,
1066 0xd80c, 0xff000ff0, 0x00000100
1069 static const u32 spectre_golden_spm_registers[] =
1071 0x30800, 0xe0ffffff, 0xe0000000
1074 static const u32 spectre_golden_common_registers[] =
1076 0xc770, 0xffffffff, 0x00000800,
1077 0xc774, 0xffffffff, 0x00000800,
1078 0xc798, 0xffffffff, 0x00007fbf,
1079 0xc79c, 0xffffffff, 0x00007faf
1082 static const u32 spectre_golden_registers[] =
1084 0x3c000, 0xffff1fff, 0x96940200,
1085 0x3c00c, 0xffff0001, 0xff000000,
1086 0x3c200, 0xfffc0fff, 0x00000100,
1087 0x6ed8, 0x00010101, 0x00010000,
1088 0x9834, 0xf00fffff, 0x00000400,
1089 0x9838, 0xfffffffc, 0x00020200,
1090 0x5bb0, 0x000000f0, 0x00000070,
1091 0x5bc0, 0xf0311fff, 0x80300000,
1092 0x98f8, 0x73773777, 0x12010001,
1093 0x9b7c, 0x00ff0000, 0x00fc0000,
1094 0x2f48, 0x73773777, 0x12010001,
1095 0x8a14, 0xf000003f, 0x00000007,
1096 0x8b24, 0xffffffff, 0x00ffffff,
1097 0x28350, 0x3f3f3fff, 0x00000082,
1098 0x28355, 0x0000003f, 0x00000000,
1099 0x3e78, 0x00000001, 0x00000002,
1100 0x913c, 0xffff03df, 0x00000004,
1101 0xc768, 0x00000008, 0x00000008,
1102 0x8c00, 0x000008ff, 0x00000800,
1103 0x9508, 0x00010000, 0x00010000,
1104 0xac0c, 0xffffffff, 0x54763210,
1105 0x214f8, 0x01ff01ff, 0x00000002,
1106 0x21498, 0x007ff800, 0x00200000,
1107 0x2015c, 0xffffffff, 0x00000f40,
1108 0x30934, 0xffffffff, 0x00000001
1111 static const u32 spectre_mgcg_cgcg_init[] =
1113 0xc420, 0xffffffff, 0xfffffffc,
1114 0x30800, 0xffffffff, 0xe0000000,
1115 0x3c2a0, 0xffffffff, 0x00000100,
1116 0x3c208, 0xffffffff, 0x00000100,
1117 0x3c2c0, 0xffffffff, 0x00000100,
1118 0x3c2c8, 0xffffffff, 0x00000100,
1119 0x3c2c4, 0xffffffff, 0x00000100,
1120 0x55e4, 0xffffffff, 0x00600100,
1121 0x3c280, 0xffffffff, 0x00000100,
1122 0x3c214, 0xffffffff, 0x06000100,
1123 0x3c220, 0xffffffff, 0x00000100,
1124 0x3c218, 0xffffffff, 0x06000100,
1125 0x3c204, 0xffffffff, 0x00000100,
1126 0x3c2e0, 0xffffffff, 0x00000100,
1127 0x3c224, 0xffffffff, 0x00000100,
1128 0x3c200, 0xffffffff, 0x00000100,
1129 0x3c230, 0xffffffff, 0x00000100,
1130 0x3c234, 0xffffffff, 0x00000100,
1131 0x3c250, 0xffffffff, 0x00000100,
1132 0x3c254, 0xffffffff, 0x00000100,
1133 0x3c258, 0xffffffff, 0x00000100,
1134 0x3c25c, 0xffffffff, 0x00000100,
1135 0x3c260, 0xffffffff, 0x00000100,
1136 0x3c27c, 0xffffffff, 0x00000100,
1137 0x3c278, 0xffffffff, 0x00000100,
1138 0x3c210, 0xffffffff, 0x06000100,
1139 0x3c290, 0xffffffff, 0x00000100,
1140 0x3c274, 0xffffffff, 0x00000100,
1141 0x3c2b4, 0xffffffff, 0x00000100,
1142 0x3c2b0, 0xffffffff, 0x00000100,
1143 0x3c270, 0xffffffff, 0x00000100,
1144 0x30800, 0xffffffff, 0xe0000000,
1145 0x3c020, 0xffffffff, 0x00010000,
1146 0x3c024, 0xffffffff, 0x00030002,
1147 0x3c028, 0xffffffff, 0x00040007,
1148 0x3c02c, 0xffffffff, 0x00060005,
1149 0x3c030, 0xffffffff, 0x00090008,
1150 0x3c034, 0xffffffff, 0x00010000,
1151 0x3c038, 0xffffffff, 0x00030002,
1152 0x3c03c, 0xffffffff, 0x00040007,
1153 0x3c040, 0xffffffff, 0x00060005,
1154 0x3c044, 0xffffffff, 0x00090008,
1155 0x3c048, 0xffffffff, 0x00010000,
1156 0x3c04c, 0xffffffff, 0x00030002,
1157 0x3c050, 0xffffffff, 0x00040007,
1158 0x3c054, 0xffffffff, 0x00060005,
1159 0x3c058, 0xffffffff, 0x00090008,
1160 0x3c05c, 0xffffffff, 0x00010000,
1161 0x3c060, 0xffffffff, 0x00030002,
1162 0x3c064, 0xffffffff, 0x00040007,
1163 0x3c068, 0xffffffff, 0x00060005,
1164 0x3c06c, 0xffffffff, 0x00090008,
1165 0x3c070, 0xffffffff, 0x00010000,
1166 0x3c074, 0xffffffff, 0x00030002,
1167 0x3c078, 0xffffffff, 0x00040007,
1168 0x3c07c, 0xffffffff, 0x00060005,
1169 0x3c080, 0xffffffff, 0x00090008,
1170 0x3c084, 0xffffffff, 0x00010000,
1171 0x3c088, 0xffffffff, 0x00030002,
1172 0x3c08c, 0xffffffff, 0x00040007,
1173 0x3c090, 0xffffffff, 0x00060005,
1174 0x3c094, 0xffffffff, 0x00090008,
1175 0x3c098, 0xffffffff, 0x00010000,
1176 0x3c09c, 0xffffffff, 0x00030002,
1177 0x3c0a0, 0xffffffff, 0x00040007,
1178 0x3c0a4, 0xffffffff, 0x00060005,
1179 0x3c0a8, 0xffffffff, 0x00090008,
1180 0x3c0ac, 0xffffffff, 0x00010000,
1181 0x3c0b0, 0xffffffff, 0x00030002,
1182 0x3c0b4, 0xffffffff, 0x00040007,
1183 0x3c0b8, 0xffffffff, 0x00060005,
1184 0x3c0bc, 0xffffffff, 0x00090008,
1185 0x3c000, 0xffffffff, 0x96e00200,
1186 0x8708, 0xffffffff, 0x00900100,
1187 0xc424, 0xffffffff, 0x0020003f,
1188 0x38, 0xffffffff, 0x0140001c,
1189 0x3c, 0x000f0000, 0x000f0000,
1190 0x220, 0xffffffff, 0xC060000C,
1191 0x224, 0xc0000fff, 0x00000100,
1192 0xf90, 0xffffffff, 0x00000100,
1193 0xf98, 0x00000101, 0x00000000,
1194 0x20a8, 0xffffffff, 0x00000104,
1195 0x55e4, 0xff000fff, 0x00000100,
1196 0x30cc, 0xc0000fff, 0x00000104,
1197 0xc1e4, 0x00000001, 0x00000001,
1198 0xd00c, 0xff000ff0, 0x00000100,
1199 0xd80c, 0xff000ff0, 0x00000100
1202 static const u32 kalindi_golden_spm_registers[] =
1204 0x30800, 0xe0ffffff, 0xe0000000
1207 static const u32 kalindi_golden_common_registers[] =
1209 0xc770, 0xffffffff, 0x00000800,
1210 0xc774, 0xffffffff, 0x00000800,
1211 0xc798, 0xffffffff, 0x00007fbf,
1212 0xc79c, 0xffffffff, 0x00007faf
1215 static const u32 kalindi_golden_registers[] =
1217 0x3c000, 0xffffdfff, 0x6e944040,
1218 0x55e4, 0xff607fff, 0xfc000100,
1219 0x3c220, 0xff000fff, 0x00000100,
1220 0x3c224, 0xff000fff, 0x00000100,
1221 0x3c200, 0xfffc0fff, 0x00000100,
1222 0x6ed8, 0x00010101, 0x00010000,
1223 0x9830, 0xffffffff, 0x00000000,
1224 0x9834, 0xf00fffff, 0x00000400,
1225 0x5bb0, 0x000000f0, 0x00000070,
1226 0x5bc0, 0xf0311fff, 0x80300000,
1227 0x98f8, 0x73773777, 0x12010001,
1228 0x98fc, 0xffffffff, 0x00000010,
1229 0x9b7c, 0x00ff0000, 0x00fc0000,
1230 0x8030, 0x00001f0f, 0x0000100a,
1231 0x2f48, 0x73773777, 0x12010001,
1232 0x2408, 0x000fffff, 0x000c007f,
1233 0x8a14, 0xf000003f, 0x00000007,
1234 0x8b24, 0x3fff3fff, 0x00ffcfff,
1235 0x30a04, 0x0000ff0f, 0x00000000,
1236 0x28a4c, 0x07ffffff, 0x06000000,
1237 0x4d8, 0x00000fff, 0x00000100,
1238 0x3e78, 0x00000001, 0x00000002,
1239 0xc768, 0x00000008, 0x00000008,
1240 0x8c00, 0x000000ff, 0x00000003,
1241 0x214f8, 0x01ff01ff, 0x00000002,
1242 0x21498, 0x007ff800, 0x00200000,
1243 0x2015c, 0xffffffff, 0x00000f40,
1244 0x88c4, 0x001f3ae3, 0x00000082,
1245 0x88d4, 0x0000001f, 0x00000010,
1246 0x30934, 0xffffffff, 0x00000000
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1251 0xc420, 0xffffffff, 0xfffffffc,
1252 0x30800, 0xffffffff, 0xe0000000,
1253 0x3c2a0, 0xffffffff, 0x00000100,
1254 0x3c208, 0xffffffff, 0x00000100,
1255 0x3c2c0, 0xffffffff, 0x00000100,
1256 0x3c2c8, 0xffffffff, 0x00000100,
1257 0x3c2c4, 0xffffffff, 0x00000100,
1258 0x55e4, 0xffffffff, 0x00600100,
1259 0x3c280, 0xffffffff, 0x00000100,
1260 0x3c214, 0xffffffff, 0x06000100,
1261 0x3c220, 0xffffffff, 0x00000100,
1262 0x3c218, 0xffffffff, 0x06000100,
1263 0x3c204, 0xffffffff, 0x00000100,
1264 0x3c2e0, 0xffffffff, 0x00000100,
1265 0x3c224, 0xffffffff, 0x00000100,
1266 0x3c200, 0xffffffff, 0x00000100,
1267 0x3c230, 0xffffffff, 0x00000100,
1268 0x3c234, 0xffffffff, 0x00000100,
1269 0x3c250, 0xffffffff, 0x00000100,
1270 0x3c254, 0xffffffff, 0x00000100,
1271 0x3c258, 0xffffffff, 0x00000100,
1272 0x3c25c, 0xffffffff, 0x00000100,
1273 0x3c260, 0xffffffff, 0x00000100,
1274 0x3c27c, 0xffffffff, 0x00000100,
1275 0x3c278, 0xffffffff, 0x00000100,
1276 0x3c210, 0xffffffff, 0x06000100,
1277 0x3c290, 0xffffffff, 0x00000100,
1278 0x3c274, 0xffffffff, 0x00000100,
1279 0x3c2b4, 0xffffffff, 0x00000100,
1280 0x3c2b0, 0xffffffff, 0x00000100,
1281 0x3c270, 0xffffffff, 0x00000100,
1282 0x30800, 0xffffffff, 0xe0000000,
1283 0x3c020, 0xffffffff, 0x00010000,
1284 0x3c024, 0xffffffff, 0x00030002,
1285 0x3c028, 0xffffffff, 0x00040007,
1286 0x3c02c, 0xffffffff, 0x00060005,
1287 0x3c030, 0xffffffff, 0x00090008,
1288 0x3c034, 0xffffffff, 0x00010000,
1289 0x3c038, 0xffffffff, 0x00030002,
1290 0x3c03c, 0xffffffff, 0x00040007,
1291 0x3c040, 0xffffffff, 0x00060005,
1292 0x3c044, 0xffffffff, 0x00090008,
1293 0x3c000, 0xffffffff, 0x96e00200,
1294 0x8708, 0xffffffff, 0x00900100,
1295 0xc424, 0xffffffff, 0x0020003f,
1296 0x38, 0xffffffff, 0x0140001c,
1297 0x3c, 0x000f0000, 0x000f0000,
1298 0x220, 0xffffffff, 0xC060000C,
1299 0x224, 0xc0000fff, 0x00000100,
1300 0x20a8, 0xffffffff, 0x00000104,
1301 0x55e4, 0xff000fff, 0x00000100,
1302 0x30cc, 0xc0000fff, 0x00000104,
1303 0xc1e4, 0x00000001, 0x00000001,
1304 0xd00c, 0xff000ff0, 0x00000100,
1305 0xd80c, 0xff000ff0, 0x00000100
1308 static const u32 hawaii_golden_spm_registers[] =
1310 0x30800, 0xe0ffffff, 0xe0000000
1313 static const u32 hawaii_golden_common_registers[] =
1315 0x30800, 0xffffffff, 0xe0000000,
1316 0x28350, 0xffffffff, 0x3a00161a,
1317 0x28354, 0xffffffff, 0x0000002e,
1318 0x9a10, 0xffffffff, 0x00018208,
1319 0x98f8, 0xffffffff, 0x12011003
1322 static const u32 hawaii_golden_registers[] =
1324 0x3354, 0x00000333, 0x00000333,
1325 0x9a10, 0x00010000, 0x00058208,
1326 0x9830, 0xffffffff, 0x00000000,
1327 0x9834, 0xf00fffff, 0x00000400,
1328 0x9838, 0x0002021c, 0x00020200,
1329 0xc78, 0x00000080, 0x00000000,
1330 0x5bb0, 0x000000f0, 0x00000070,
1331 0x5bc0, 0xf0311fff, 0x80300000,
1332 0x350c, 0x00810000, 0x408af000,
1333 0x7030, 0x31000111, 0x00000011,
1334 0x2f48, 0x73773777, 0x12010001,
1335 0x2120, 0x0000007f, 0x0000001b,
1336 0x21dc, 0x00007fb6, 0x00002191,
1337 0x3628, 0x0000003f, 0x0000000a,
1338 0x362c, 0x0000003f, 0x0000000a,
1339 0x2ae4, 0x00073ffe, 0x000022a2,
1340 0x240c, 0x000007ff, 0x00000000,
1341 0x8bf0, 0x00002001, 0x00000001,
1342 0x8b24, 0xffffffff, 0x00ffffff,
1343 0x30a04, 0x0000ff0f, 0x00000000,
1344 0x28a4c, 0x07ffffff, 0x06000000,
1345 0x3e78, 0x00000001, 0x00000002,
1346 0xc768, 0x00000008, 0x00000008,
1347 0xc770, 0x00000f00, 0x00000800,
1348 0xc774, 0x00000f00, 0x00000800,
1349 0xc798, 0x00ffffff, 0x00ff7fbf,
1350 0xc79c, 0x00ffffff, 0x00ff7faf,
1351 0x8c00, 0x000000ff, 0x00000800,
1352 0xe40, 0x00001fff, 0x00001fff,
1353 0x9060, 0x0000007f, 0x00000020,
1354 0x9508, 0x00010000, 0x00010000,
1355 0xae00, 0x00100000, 0x000ff07c,
1356 0xac14, 0x000003ff, 0x0000000f,
1357 0xac10, 0xffffffff, 0x7564fdec,
1358 0xac0c, 0xffffffff, 0x3120b9a8,
1359 0xac08, 0x20000000, 0x0f9c0000
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1364 0xc420, 0xffffffff, 0xfffffffd,
1365 0x30800, 0xffffffff, 0xe0000000,
1366 0x3c2a0, 0xffffffff, 0x00000100,
1367 0x3c208, 0xffffffff, 0x00000100,
1368 0x3c2c0, 0xffffffff, 0x00000100,
1369 0x3c2c8, 0xffffffff, 0x00000100,
1370 0x3c2c4, 0xffffffff, 0x00000100,
1371 0x55e4, 0xffffffff, 0x00200100,
1372 0x3c280, 0xffffffff, 0x00000100,
1373 0x3c214, 0xffffffff, 0x06000100,
1374 0x3c220, 0xffffffff, 0x00000100,
1375 0x3c218, 0xffffffff, 0x06000100,
1376 0x3c204, 0xffffffff, 0x00000100,
1377 0x3c2e0, 0xffffffff, 0x00000100,
1378 0x3c224, 0xffffffff, 0x00000100,
1379 0x3c200, 0xffffffff, 0x00000100,
1380 0x3c230, 0xffffffff, 0x00000100,
1381 0x3c234, 0xffffffff, 0x00000100,
1382 0x3c250, 0xffffffff, 0x00000100,
1383 0x3c254, 0xffffffff, 0x00000100,
1384 0x3c258, 0xffffffff, 0x00000100,
1385 0x3c25c, 0xffffffff, 0x00000100,
1386 0x3c260, 0xffffffff, 0x00000100,
1387 0x3c27c, 0xffffffff, 0x00000100,
1388 0x3c278, 0xffffffff, 0x00000100,
1389 0x3c210, 0xffffffff, 0x06000100,
1390 0x3c290, 0xffffffff, 0x00000100,
1391 0x3c274, 0xffffffff, 0x00000100,
1392 0x3c2b4, 0xffffffff, 0x00000100,
1393 0x3c2b0, 0xffffffff, 0x00000100,
1394 0x3c270, 0xffffffff, 0x00000100,
1395 0x30800, 0xffffffff, 0xe0000000,
1396 0x3c020, 0xffffffff, 0x00010000,
1397 0x3c024, 0xffffffff, 0x00030002,
1398 0x3c028, 0xffffffff, 0x00040007,
1399 0x3c02c, 0xffffffff, 0x00060005,
1400 0x3c030, 0xffffffff, 0x00090008,
1401 0x3c034, 0xffffffff, 0x00010000,
1402 0x3c038, 0xffffffff, 0x00030002,
1403 0x3c03c, 0xffffffff, 0x00040007,
1404 0x3c040, 0xffffffff, 0x00060005,
1405 0x3c044, 0xffffffff, 0x00090008,
1406 0x3c048, 0xffffffff, 0x00010000,
1407 0x3c04c, 0xffffffff, 0x00030002,
1408 0x3c050, 0xffffffff, 0x00040007,
1409 0x3c054, 0xffffffff, 0x00060005,
1410 0x3c058, 0xffffffff, 0x00090008,
1411 0x3c05c, 0xffffffff, 0x00010000,
1412 0x3c060, 0xffffffff, 0x00030002,
1413 0x3c064, 0xffffffff, 0x00040007,
1414 0x3c068, 0xffffffff, 0x00060005,
1415 0x3c06c, 0xffffffff, 0x00090008,
1416 0x3c070, 0xffffffff, 0x00010000,
1417 0x3c074, 0xffffffff, 0x00030002,
1418 0x3c078, 0xffffffff, 0x00040007,
1419 0x3c07c, 0xffffffff, 0x00060005,
1420 0x3c080, 0xffffffff, 0x00090008,
1421 0x3c084, 0xffffffff, 0x00010000,
1422 0x3c088, 0xffffffff, 0x00030002,
1423 0x3c08c, 0xffffffff, 0x00040007,
1424 0x3c090, 0xffffffff, 0x00060005,
1425 0x3c094, 0xffffffff, 0x00090008,
1426 0x3c098, 0xffffffff, 0x00010000,
1427 0x3c09c, 0xffffffff, 0x00030002,
1428 0x3c0a0, 0xffffffff, 0x00040007,
1429 0x3c0a4, 0xffffffff, 0x00060005,
1430 0x3c0a8, 0xffffffff, 0x00090008,
1431 0x3c0ac, 0xffffffff, 0x00010000,
1432 0x3c0b0, 0xffffffff, 0x00030002,
1433 0x3c0b4, 0xffffffff, 0x00040007,
1434 0x3c0b8, 0xffffffff, 0x00060005,
1435 0x3c0bc, 0xffffffff, 0x00090008,
1436 0x3c0c0, 0xffffffff, 0x00010000,
1437 0x3c0c4, 0xffffffff, 0x00030002,
1438 0x3c0c8, 0xffffffff, 0x00040007,
1439 0x3c0cc, 0xffffffff, 0x00060005,
1440 0x3c0d0, 0xffffffff, 0x00090008,
1441 0x3c0d4, 0xffffffff, 0x00010000,
1442 0x3c0d8, 0xffffffff, 0x00030002,
1443 0x3c0dc, 0xffffffff, 0x00040007,
1444 0x3c0e0, 0xffffffff, 0x00060005,
1445 0x3c0e4, 0xffffffff, 0x00090008,
1446 0x3c0e8, 0xffffffff, 0x00010000,
1447 0x3c0ec, 0xffffffff, 0x00030002,
1448 0x3c0f0, 0xffffffff, 0x00040007,
1449 0x3c0f4, 0xffffffff, 0x00060005,
1450 0x3c0f8, 0xffffffff, 0x00090008,
1451 0xc318, 0xffffffff, 0x00020200,
1452 0x3350, 0xffffffff, 0x00000200,
1453 0x15c0, 0xffffffff, 0x00000400,
1454 0x55e8, 0xffffffff, 0x00000000,
1455 0x2f50, 0xffffffff, 0x00000902,
1456 0x3c000, 0xffffffff, 0x96940200,
1457 0x8708, 0xffffffff, 0x00900100,
1458 0xc424, 0xffffffff, 0x0020003f,
1459 0x38, 0xffffffff, 0x0140001c,
1460 0x3c, 0x000f0000, 0x000f0000,
1461 0x220, 0xffffffff, 0xc060000c,
1462 0x224, 0xc0000fff, 0x00000100,
1463 0xf90, 0xffffffff, 0x00000100,
1464 0xf98, 0x00000101, 0x00000000,
1465 0x20a8, 0xffffffff, 0x00000104,
1466 0x55e4, 0xff000fff, 0x00000100,
1467 0x30cc, 0xc0000fff, 0x00000104,
1468 0xc1e4, 0x00000001, 0x00000001,
1469 0xd00c, 0xff000ff0, 0x00000100,
1470 0xd80c, 0xff000ff0, 0x00000100
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1475 switch (rdev->family) {
1477 radeon_program_register_sequence(rdev,
1478 bonaire_mgcg_cgcg_init,
1479 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480 radeon_program_register_sequence(rdev,
1481 bonaire_golden_registers,
1482 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483 radeon_program_register_sequence(rdev,
1484 bonaire_golden_common_registers,
1485 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486 radeon_program_register_sequence(rdev,
1487 bonaire_golden_spm_registers,
1488 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1491 radeon_program_register_sequence(rdev,
1492 kalindi_mgcg_cgcg_init,
1493 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494 radeon_program_register_sequence(rdev,
1495 kalindi_golden_registers,
1496 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497 radeon_program_register_sequence(rdev,
1498 kalindi_golden_common_registers,
1499 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500 radeon_program_register_sequence(rdev,
1501 kalindi_golden_spm_registers,
1502 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1505 radeon_program_register_sequence(rdev,
1506 spectre_mgcg_cgcg_init,
1507 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508 radeon_program_register_sequence(rdev,
1509 spectre_golden_registers,
1510 (const u32)ARRAY_SIZE(spectre_golden_registers));
1511 radeon_program_register_sequence(rdev,
1512 spectre_golden_common_registers,
1513 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514 radeon_program_register_sequence(rdev,
1515 spectre_golden_spm_registers,
1516 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1519 radeon_program_register_sequence(rdev,
1520 hawaii_mgcg_cgcg_init,
1521 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522 radeon_program_register_sequence(rdev,
1523 hawaii_golden_registers,
1524 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525 radeon_program_register_sequence(rdev,
1526 hawaii_golden_common_registers,
1527 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528 radeon_program_register_sequence(rdev,
1529 hawaii_golden_spm_registers,
1530 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1538 * cik_get_xclk - get the xclk
1540 * @rdev: radeon_device pointer
1542 * Returns the reference clock used by the gfx engine
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1547 u32 reference_clock = rdev->clock.spll.reference_freq;
1549 if (rdev->flags & RADEON_IS_IGP) {
1550 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551 return reference_clock / 2;
1553 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554 return reference_clock / 4;
1556 return reference_clock;
1560 * cik_mm_rdoorbell - read a doorbell dword
1562 * @rdev: radeon_device pointer
1563 * @index: doorbell index
1565 * Returns the value in the doorbell aperture at the
1566 * requested doorbell index (CIK).
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1570 if (index < rdev->doorbell.num_doorbells) {
1571 return readl(rdev->doorbell.ptr + index);
1573 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1579 * cik_mm_wdoorbell - write a doorbell dword
1581 * @rdev: radeon_device pointer
1582 * @index: doorbell index
1583 * @v: value to write
1585 * Writes @v to the doorbell aperture at the
1586 * requested doorbell index (CIK).
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1590 if (index < rdev->doorbell.num_doorbells) {
1591 writel(v, rdev->doorbell.ptr + index);
1593 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1601 {0x00000070, 0x04400000},
1602 {0x00000071, 0x80c01803},
1603 {0x00000072, 0x00004004},
1604 {0x00000073, 0x00000100},
1605 {0x00000074, 0x00ff0000},
1606 {0x00000075, 0x34000000},
1607 {0x00000076, 0x08000014},
1608 {0x00000077, 0x00cc08ec},
1609 {0x00000078, 0x00000400},
1610 {0x00000079, 0x00000000},
1611 {0x0000007a, 0x04090000},
1612 {0x0000007c, 0x00000000},
1613 {0x0000007e, 0x4408a8e8},
1614 {0x0000007f, 0x00000304},
1615 {0x00000080, 0x00000000},
1616 {0x00000082, 0x00000001},
1617 {0x00000083, 0x00000002},
1618 {0x00000084, 0xf3e4f400},
1619 {0x00000085, 0x052024e3},
1620 {0x00000087, 0x00000000},
1621 {0x00000088, 0x01000000},
1622 {0x0000008a, 0x1c0a0000},
1623 {0x0000008b, 0xff010000},
1624 {0x0000008d, 0xffffefff},
1625 {0x0000008e, 0xfff3efff},
1626 {0x0000008f, 0xfff3efbf},
1627 {0x00000092, 0xf7ffffff},
1628 {0x00000093, 0xffffff7f},
1629 {0x00000095, 0x00101101},
1630 {0x00000096, 0x00000fff},
1631 {0x00000097, 0x00116fff},
1632 {0x00000098, 0x60010000},
1633 {0x00000099, 0x10010000},
1634 {0x0000009a, 0x00006000},
1635 {0x0000009b, 0x00001000},
1636 {0x0000009f, 0x00b48000}
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1643 {0x0000007d, 0x40000000},
1644 {0x0000007e, 0x40180304},
1645 {0x0000007f, 0x0000ff00},
1646 {0x00000081, 0x00000000},
1647 {0x00000083, 0x00000800},
1648 {0x00000086, 0x00000000},
1649 {0x00000087, 0x00000100},
1650 {0x00000088, 0x00020100},
1651 {0x00000089, 0x00000000},
1652 {0x0000008b, 0x00040000},
1653 {0x0000008c, 0x00000100},
1654 {0x0000008e, 0xff010000},
1655 {0x00000090, 0xffffefff},
1656 {0x00000091, 0xfff3efff},
1657 {0x00000092, 0xfff3efbf},
1658 {0x00000093, 0xf7ffffff},
1659 {0x00000094, 0xffffff7f},
1660 {0x00000095, 0x00000fff},
1661 {0x00000096, 0x00116fff},
1662 {0x00000097, 0x60010000},
1663 {0x00000098, 0x10010000},
1664 {0x0000009f, 0x00c79000}
1669 * cik_srbm_select - select specific register instances
1671 * @rdev: radeon_device pointer
1672 * @me: selected ME (micro engine)
1677 * Switches the currently active registers instances. Some
1678 * registers are instanced per VMID, others are instanced per
1679 * me/pipe/queue combination.
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682 u32 me, u32 pipe, u32 queue, u32 vmid)
1684 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1687 QUEUEID(queue & 0x7));
1688 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1693 * ci_mc_load_microcode - load MC ucode into the hw
1695 * @rdev: radeon_device pointer
1697 * Load the GDDR MC ucode into the hw (CIK).
1698 * Returns 0 on success, error on failure.
1700 int ci_mc_load_microcode(struct radeon_device *rdev)
1702 const __be32 *fw_data;
1703 u32 running, blackout = 0;
1705 int i, ucode_size, regs_size;
1710 switch (rdev->family) {
1712 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713 ucode_size = CIK_MC_UCODE_SIZE;
1714 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1717 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718 ucode_size = HAWAII_MC_UCODE_SIZE;
1719 regs_size = HAWAII_IO_MC_REGS_SIZE;
1725 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1729 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1733 /* reset the engine and set to writable */
1734 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1737 /* load mc io regs */
1738 for (i = 0; i < regs_size; i++) {
1739 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1742 /* load the MC ucode */
1743 fw_data = (const __be32 *)rdev->mc_fw->data;
1744 for (i = 0; i < ucode_size; i++)
1745 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1747 /* put the engine back into the active state */
1748 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1752 /* wait for training to complete */
1753 for (i = 0; i < rdev->usec_timeout; i++) {
1754 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1758 for (i = 0; i < rdev->usec_timeout; i++) {
1759 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1765 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1772 * cik_init_microcode - load ucode images from disk
1774 * @rdev: radeon_device pointer
1776 * Use the firmware interface to load the ucode images into
1777 * the driver (not loaded into hw).
1778 * Returns 0 on success, error on failure.
1780 static int cik_init_microcode(struct radeon_device *rdev)
1782 const char *chip_name;
1783 size_t pfp_req_size, me_req_size, ce_req_size,
1784 mec_req_size, rlc_req_size, mc_req_size = 0,
1785 sdma_req_size, smc_req_size = 0;
1791 switch (rdev->family) {
1793 chip_name = "BONAIRE";
1794 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795 me_req_size = CIK_ME_UCODE_SIZE * 4;
1796 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1804 chip_name = "HAWAII";
1805 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806 me_req_size = CIK_ME_UCODE_SIZE * 4;
1807 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1815 chip_name = "KAVERI";
1816 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817 me_req_size = CIK_ME_UCODE_SIZE * 4;
1818 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1824 chip_name = "KABINI";
1825 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826 me_req_size = CIK_ME_UCODE_SIZE * 4;
1827 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1835 DRM_INFO("Loading %s Microcode\n", chip_name);
1837 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1841 if (rdev->pfp_fw->size != pfp_req_size) {
1843 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844 rdev->pfp_fw->size, fw_name);
1849 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1853 if (rdev->me_fw->size != me_req_size) {
1855 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856 rdev->me_fw->size, fw_name);
1860 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1864 if (rdev->ce_fw->size != ce_req_size) {
1866 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867 rdev->ce_fw->size, fw_name);
1871 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1875 if (rdev->mec_fw->size != mec_req_size) {
1877 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878 rdev->mec_fw->size, fw_name);
1882 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1886 if (rdev->rlc_fw->size != rlc_req_size) {
1888 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889 rdev->rlc_fw->size, fw_name);
1893 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1897 if (rdev->sdma_fw->size != sdma_req_size) {
1899 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900 rdev->sdma_fw->size, fw_name);
1904 /* No SMC, MC ucode on APUs */
1905 if (!(rdev->flags & RADEON_IS_IGP)) {
1906 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1910 if (rdev->mc_fw->size != mc_req_size) {
1912 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913 rdev->mc_fw->size, fw_name);
1917 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1921 "smc: error loading firmware \"%s\"\n",
1923 release_firmware(rdev->smc_fw);
1924 rdev->smc_fw = NULL;
1926 } else if (rdev->smc_fw->size != smc_req_size) {
1928 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929 rdev->smc_fw->size, fw_name);
1938 "cik_cp: Failed to load firmware \"%s\"\n",
1940 release_firmware(rdev->pfp_fw);
1941 rdev->pfp_fw = NULL;
1942 release_firmware(rdev->me_fw);
1944 release_firmware(rdev->ce_fw);
1946 release_firmware(rdev->rlc_fw);
1947 rdev->rlc_fw = NULL;
1948 release_firmware(rdev->mc_fw);
1950 release_firmware(rdev->smc_fw);
1951 rdev->smc_fw = NULL;
1960 * cik_tiling_mode_table_init - init the hw tiling table
1962 * @rdev: radeon_device pointer
1964 * Starting with SI, the tiling setup is done globally in a
1965 * set of 32 tiling modes. Rather than selecting each set of
1966 * parameters per surface as on older asics, we just select
1967 * which index in the tiling table we want to use, and the
1968 * surface uses those parameters (CIK).
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1972 const u32 num_tile_mode_states = 32;
1973 const u32 num_secondary_tile_mode_states = 16;
1974 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975 u32 num_pipe_configs;
1976 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977 rdev->config.cik.max_shader_engines;
1979 switch (rdev->config.cik.mem_row_size_in_kb) {
1981 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1985 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1988 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1992 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993 if (num_pipe_configs > 8)
1994 num_pipe_configs = 16;
1996 if (num_pipe_configs == 16) {
1997 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998 switch (reg_offset) {
2000 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2006 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2012 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2018 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2024 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027 TILE_SPLIT(split_equal_to_row_size));
2030 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2034 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2040 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043 TILE_SPLIT(split_equal_to_row_size));
2046 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2050 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2054 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2060 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2066 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2076 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2082 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2088 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2098 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2104 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2110 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2119 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2122 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123 switch (reg_offset) {
2125 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128 NUM_BANKS(ADDR_SURF_16_BANK));
2131 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134 NUM_BANKS(ADDR_SURF_16_BANK));
2137 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140 NUM_BANKS(ADDR_SURF_16_BANK));
2143 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146 NUM_BANKS(ADDR_SURF_16_BANK));
2149 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152 NUM_BANKS(ADDR_SURF_8_BANK));
2155 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158 NUM_BANKS(ADDR_SURF_4_BANK));
2161 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164 NUM_BANKS(ADDR_SURF_2_BANK));
2167 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170 NUM_BANKS(ADDR_SURF_16_BANK));
2173 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176 NUM_BANKS(ADDR_SURF_16_BANK));
2179 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182 NUM_BANKS(ADDR_SURF_16_BANK));
2185 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188 NUM_BANKS(ADDR_SURF_8_BANK));
2191 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194 NUM_BANKS(ADDR_SURF_4_BANK));
2197 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200 NUM_BANKS(ADDR_SURF_2_BANK));
2203 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206 NUM_BANKS(ADDR_SURF_2_BANK));
2212 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2214 } else if (num_pipe_configs == 8) {
2215 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216 switch (reg_offset) {
2218 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2224 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2230 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2236 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2242 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245 TILE_SPLIT(split_equal_to_row_size));
2248 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2258 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261 TILE_SPLIT(split_equal_to_row_size));
2264 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2268 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2272 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2284 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2290 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2294 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2300 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2306 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2312 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2316 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2328 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2337 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2340 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341 switch (reg_offset) {
2343 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346 NUM_BANKS(ADDR_SURF_16_BANK));
2349 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 NUM_BANKS(ADDR_SURF_16_BANK));
2355 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_16_BANK));
2361 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 NUM_BANKS(ADDR_SURF_16_BANK));
2367 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370 NUM_BANKS(ADDR_SURF_8_BANK));
2373 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376 NUM_BANKS(ADDR_SURF_4_BANK));
2379 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382 NUM_BANKS(ADDR_SURF_2_BANK));
2385 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388 NUM_BANKS(ADDR_SURF_16_BANK));
2391 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 NUM_BANKS(ADDR_SURF_16_BANK));
2397 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 NUM_BANKS(ADDR_SURF_16_BANK));
2403 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406 NUM_BANKS(ADDR_SURF_16_BANK));
2409 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412 NUM_BANKS(ADDR_SURF_8_BANK));
2415 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418 NUM_BANKS(ADDR_SURF_4_BANK));
2421 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424 NUM_BANKS(ADDR_SURF_2_BANK));
2430 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2433 } else if (num_pipe_configs == 4) {
2435 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436 switch (reg_offset) {
2438 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2444 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2450 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2456 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2462 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465 TILE_SPLIT(split_equal_to_row_size));
2468 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2472 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2478 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481 TILE_SPLIT(split_equal_to_row_size));
2484 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2488 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2492 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2504 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2514 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2520 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2526 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2532 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2536 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2542 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2560 } else if (num_rbs < 4) {
2561 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562 switch (reg_offset) {
2564 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2570 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2576 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2582 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2588 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591 TILE_SPLIT(split_equal_to_row_size));
2594 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2604 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607 TILE_SPLIT(split_equal_to_row_size));
2610 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2614 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2618 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2624 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2640 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2652 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2658 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2662 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2668 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2674 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2683 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2687 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688 switch (reg_offset) {
2690 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 NUM_BANKS(ADDR_SURF_16_BANK));
2696 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 NUM_BANKS(ADDR_SURF_16_BANK));
2702 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705 NUM_BANKS(ADDR_SURF_16_BANK));
2708 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711 NUM_BANKS(ADDR_SURF_16_BANK));
2714 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717 NUM_BANKS(ADDR_SURF_16_BANK));
2720 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723 NUM_BANKS(ADDR_SURF_8_BANK));
2726 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729 NUM_BANKS(ADDR_SURF_4_BANK));
2732 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 NUM_BANKS(ADDR_SURF_16_BANK));
2738 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741 NUM_BANKS(ADDR_SURF_16_BANK));
2744 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747 NUM_BANKS(ADDR_SURF_16_BANK));
2750 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 NUM_BANKS(ADDR_SURF_16_BANK));
2756 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759 NUM_BANKS(ADDR_SURF_16_BANK));
2762 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765 NUM_BANKS(ADDR_SURF_8_BANK));
2768 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771 NUM_BANKS(ADDR_SURF_4_BANK));
2777 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2780 } else if (num_pipe_configs == 2) {
2781 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782 switch (reg_offset) {
2784 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 PIPE_CONFIG(ADDR_SURF_P2) |
2787 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2790 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 PIPE_CONFIG(ADDR_SURF_P2) |
2793 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2796 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 PIPE_CONFIG(ADDR_SURF_P2) |
2799 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2802 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 PIPE_CONFIG(ADDR_SURF_P2) |
2805 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2808 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 PIPE_CONFIG(ADDR_SURF_P2) |
2811 TILE_SPLIT(split_equal_to_row_size));
2814 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2818 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820 PIPE_CONFIG(ADDR_SURF_P2) |
2821 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2824 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826 PIPE_CONFIG(ADDR_SURF_P2) |
2827 TILE_SPLIT(split_equal_to_row_size));
2830 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2833 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2837 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839 PIPE_CONFIG(ADDR_SURF_P2) |
2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2843 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845 PIPE_CONFIG(ADDR_SURF_P2) |
2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851 PIPE_CONFIG(ADDR_SURF_P2) |
2852 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2855 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2859 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861 PIPE_CONFIG(ADDR_SURF_P2) |
2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2865 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 PIPE_CONFIG(ADDR_SURF_P2) |
2868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2871 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873 PIPE_CONFIG(ADDR_SURF_P2) |
2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2877 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2881 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883 PIPE_CONFIG(ADDR_SURF_P2) |
2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889 PIPE_CONFIG(ADDR_SURF_P2) |
2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895 PIPE_CONFIG(ADDR_SURF_P2) |
2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2902 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2905 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906 switch (reg_offset) {
2908 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911 NUM_BANKS(ADDR_SURF_16_BANK));
2914 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 NUM_BANKS(ADDR_SURF_16_BANK));
2920 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 NUM_BANKS(ADDR_SURF_16_BANK));
2926 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 NUM_BANKS(ADDR_SURF_16_BANK));
2932 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935 NUM_BANKS(ADDR_SURF_16_BANK));
2938 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941 NUM_BANKS(ADDR_SURF_16_BANK));
2944 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947 NUM_BANKS(ADDR_SURF_8_BANK));
2950 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953 NUM_BANKS(ADDR_SURF_16_BANK));
2956 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 NUM_BANKS(ADDR_SURF_16_BANK));
2962 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 NUM_BANKS(ADDR_SURF_16_BANK));
2968 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 NUM_BANKS(ADDR_SURF_16_BANK));
2974 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977 NUM_BANKS(ADDR_SURF_16_BANK));
2980 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 NUM_BANKS(ADDR_SURF_16_BANK));
2986 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989 NUM_BANKS(ADDR_SURF_8_BANK));
2995 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2999 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3003 * cik_select_se_sh - select which SE, SH to address
3005 * @rdev: radeon_device pointer
3006 * @se_num: shader engine to address
3007 * @sh_num: sh block to address
3009 * Select which SE, SH combinations to address. Certain
3010 * registers are instanced per SE or SH. 0xffffffff means
3011 * broadcast to all SEs or SHs (CIK).
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014 u32 se_num, u32 sh_num)
3016 u32 data = INSTANCE_BROADCAST_WRITES;
3018 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020 else if (se_num == 0xffffffff)
3021 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022 else if (sh_num == 0xffffffff)
3023 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3025 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026 WREG32(GRBM_GFX_INDEX, data);
3030 * cik_create_bitmask - create a bitmask
3032 * @bit_width: length of the mask
3034 * create a variable length bit mask (CIK).
3035 * Returns the bitmask.
3037 static u32 cik_create_bitmask(u32 bit_width)
3041 for (i = 0; i < bit_width; i++) {
3049 * cik_select_se_sh - select which SE, SH to address
3051 * @rdev: radeon_device pointer
3052 * @max_rb_num: max RBs (render backends) for the asic
3053 * @se_num: number of SEs (shader engines) for the asic
3054 * @sh_per_se: number of SH blocks per SE for the asic
3056 * Calculates the bitmask of disabled RBs (CIK).
3057 * Returns the disabled RB bitmask.
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060 u32 max_rb_num_per_se,
3065 data = RREG32(CC_RB_BACKEND_DISABLE);
3067 data &= BACKEND_DISABLE_MASK;
3070 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3072 data >>= BACKEND_DISABLE_SHIFT;
3074 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3080 * cik_setup_rb - setup the RBs on the asic
3082 * @rdev: radeon_device pointer
3083 * @se_num: number of SEs (shader engines) for the asic
3084 * @sh_per_se: number of SH blocks per SE for the asic
3085 * @max_rb_num: max RBs (render backends) for the asic
3087 * Configures per-SE/SH RB registers (CIK).
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090 u32 se_num, u32 sh_per_se,
3091 u32 max_rb_num_per_se)
3095 u32 disabled_rbs = 0;
3096 u32 enabled_rbs = 0;
3098 for (i = 0; i < se_num; i++) {
3099 for (j = 0; j < sh_per_se; j++) {
3100 cik_select_se_sh(rdev, i, j);
3101 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3102 if (rdev->family == CHIP_HAWAII)
3103 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3105 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3108 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3111 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3112 if (!(disabled_rbs & mask))
3113 enabled_rbs |= mask;
3117 rdev->config.cik.backend_enable_mask = enabled_rbs;
3119 for (i = 0; i < se_num; i++) {
3120 cik_select_se_sh(rdev, i, 0xffffffff);
3122 for (j = 0; j < sh_per_se; j++) {
3123 switch (enabled_rbs & 3) {
3126 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3128 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3131 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3134 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3138 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3143 WREG32(PA_SC_RASTER_CONFIG, data);
3145 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3149 * cik_gpu_init - setup the 3D engine
3151 * @rdev: radeon_device pointer
3153 * Configures the 3D engine and tiling configuration
3154 * registers so that the 3D engine is usable.
3156 static void cik_gpu_init(struct radeon_device *rdev)
3158 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3159 u32 mc_shared_chmap, mc_arb_ramcfg;
3160 u32 hdp_host_path_cntl;
3164 switch (rdev->family) {
3166 rdev->config.cik.max_shader_engines = 2;
3167 rdev->config.cik.max_tile_pipes = 4;
3168 rdev->config.cik.max_cu_per_sh = 7;
3169 rdev->config.cik.max_sh_per_se = 1;
3170 rdev->config.cik.max_backends_per_se = 2;
3171 rdev->config.cik.max_texture_channel_caches = 4;
3172 rdev->config.cik.max_gprs = 256;
3173 rdev->config.cik.max_gs_threads = 32;
3174 rdev->config.cik.max_hw_contexts = 8;
3176 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3177 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3178 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3179 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3180 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3183 rdev->config.cik.max_shader_engines = 4;
3184 rdev->config.cik.max_tile_pipes = 16;
3185 rdev->config.cik.max_cu_per_sh = 11;
3186 rdev->config.cik.max_sh_per_se = 1;
3187 rdev->config.cik.max_backends_per_se = 4;
3188 rdev->config.cik.max_texture_channel_caches = 16;
3189 rdev->config.cik.max_gprs = 256;
3190 rdev->config.cik.max_gs_threads = 32;
3191 rdev->config.cik.max_hw_contexts = 8;
3193 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3200 rdev->config.cik.max_shader_engines = 1;
3201 rdev->config.cik.max_tile_pipes = 4;
3202 if ((rdev->pdev->device == 0x1304) ||
3203 (rdev->pdev->device == 0x1305) ||
3204 (rdev->pdev->device == 0x130C) ||
3205 (rdev->pdev->device == 0x130F) ||
3206 (rdev->pdev->device == 0x1310) ||
3207 (rdev->pdev->device == 0x1311) ||
3208 (rdev->pdev->device == 0x131C)) {
3209 rdev->config.cik.max_cu_per_sh = 8;
3210 rdev->config.cik.max_backends_per_se = 2;
3211 } else if ((rdev->pdev->device == 0x1309) ||
3212 (rdev->pdev->device == 0x130A) ||
3213 (rdev->pdev->device == 0x130D) ||
3214 (rdev->pdev->device == 0x1313) ||
3215 (rdev->pdev->device == 0x131D)) {
3216 rdev->config.cik.max_cu_per_sh = 6;
3217 rdev->config.cik.max_backends_per_se = 2;
3218 } else if ((rdev->pdev->device == 0x1306) ||
3219 (rdev->pdev->device == 0x1307) ||
3220 (rdev->pdev->device == 0x130B) ||
3221 (rdev->pdev->device == 0x130E) ||
3222 (rdev->pdev->device == 0x1315) ||
3223 (rdev->pdev->device == 0x131B)) {
3224 rdev->config.cik.max_cu_per_sh = 4;
3225 rdev->config.cik.max_backends_per_se = 1;
3227 rdev->config.cik.max_cu_per_sh = 3;
3228 rdev->config.cik.max_backends_per_se = 1;
3230 rdev->config.cik.max_sh_per_se = 1;
3231 rdev->config.cik.max_texture_channel_caches = 4;
3232 rdev->config.cik.max_gprs = 256;
3233 rdev->config.cik.max_gs_threads = 16;
3234 rdev->config.cik.max_hw_contexts = 8;
3236 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3244 rdev->config.cik.max_shader_engines = 1;
3245 rdev->config.cik.max_tile_pipes = 2;
3246 rdev->config.cik.max_cu_per_sh = 2;
3247 rdev->config.cik.max_sh_per_se = 1;
3248 rdev->config.cik.max_backends_per_se = 1;
3249 rdev->config.cik.max_texture_channel_caches = 2;
3250 rdev->config.cik.max_gprs = 256;
3251 rdev->config.cik.max_gs_threads = 16;
3252 rdev->config.cik.max_hw_contexts = 8;
3254 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3255 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3256 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3257 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3258 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3262 /* Initialize HDP */
3263 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3264 WREG32((0x2c14 + j), 0x00000000);
3265 WREG32((0x2c18 + j), 0x00000000);
3266 WREG32((0x2c1c + j), 0x00000000);
3267 WREG32((0x2c20 + j), 0x00000000);
3268 WREG32((0x2c24 + j), 0x00000000);
3271 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3275 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3276 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3278 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3279 rdev->config.cik.mem_max_burst_length_bytes = 256;
3280 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3281 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3282 if (rdev->config.cik.mem_row_size_in_kb > 4)
3283 rdev->config.cik.mem_row_size_in_kb = 4;
3284 /* XXX use MC settings? */
3285 rdev->config.cik.shader_engine_tile_size = 32;
3286 rdev->config.cik.num_gpus = 1;
3287 rdev->config.cik.multi_gpu_tile_size = 64;
3289 /* fix up row size */
3290 gb_addr_config &= ~ROW_SIZE_MASK;
3291 switch (rdev->config.cik.mem_row_size_in_kb) {
3294 gb_addr_config |= ROW_SIZE(0);
3297 gb_addr_config |= ROW_SIZE(1);
3300 gb_addr_config |= ROW_SIZE(2);
3304 /* setup tiling info dword. gb_addr_config is not adequate since it does
3305 * not have bank info, so create a custom tiling dword.
3306 * bits 3:0 num_pipes
3307 * bits 7:4 num_banks
3308 * bits 11:8 group_size
3309 * bits 15:12 row_size
3311 rdev->config.cik.tile_config = 0;
3312 switch (rdev->config.cik.num_tile_pipes) {
3314 rdev->config.cik.tile_config |= (0 << 0);
3317 rdev->config.cik.tile_config |= (1 << 0);
3320 rdev->config.cik.tile_config |= (2 << 0);
3324 /* XXX what about 12? */
3325 rdev->config.cik.tile_config |= (3 << 0);
3328 rdev->config.cik.tile_config |=
3329 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3330 rdev->config.cik.tile_config |=
3331 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3332 rdev->config.cik.tile_config |=
3333 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3335 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3336 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3337 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3338 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3339 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3340 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3341 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3342 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3344 cik_tiling_mode_table_init(rdev);
3346 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3347 rdev->config.cik.max_sh_per_se,
3348 rdev->config.cik.max_backends_per_se);
3350 /* set HW defaults for 3D engine */
3351 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3353 WREG32(SX_DEBUG_1, 0x20);
3355 WREG32(TA_CNTL_AUX, 0x00010000);
3357 tmp = RREG32(SPI_CONFIG_CNTL);
3359 WREG32(SPI_CONFIG_CNTL, tmp);
3361 WREG32(SQ_CONFIG, 1);
3363 WREG32(DB_DEBUG, 0);
3365 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3367 WREG32(DB_DEBUG2, tmp);
3369 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3371 WREG32(DB_DEBUG3, tmp);
3373 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3375 WREG32(CB_HW_CONTROL, tmp);
3377 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3379 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3380 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3381 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3382 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3384 WREG32(VGT_NUM_INSTANCES, 1);
3386 WREG32(CP_PERFMON_CNTL, 0);
3388 WREG32(SQ_CONFIG, 0);
3390 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3391 FORCE_EOV_MAX_REZ_CNT(255)));
3393 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3394 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3396 WREG32(VGT_GS_VERTEX_REUSE, 16);
3397 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3399 tmp = RREG32(HDP_MISC_CNTL);
3400 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3401 WREG32(HDP_MISC_CNTL, tmp);
3403 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3404 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3406 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3407 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3413 * GPU scratch registers helpers function.
3416 * cik_scratch_init - setup driver info for CP scratch regs
3418 * @rdev: radeon_device pointer
3420 * Set up the number and offset of the CP scratch registers.
3421 * NOTE: use of CP scratch registers is a legacy inferface and
3422 * is not used by default on newer asics (r6xx+). On newer asics,
3423 * memory buffers are used for fences rather than scratch regs.
3425 static void cik_scratch_init(struct radeon_device *rdev)
3429 rdev->scratch.num_reg = 7;
3430 rdev->scratch.reg_base = SCRATCH_REG0;
3431 for (i = 0; i < rdev->scratch.num_reg; i++) {
3432 rdev->scratch.free[i] = true;
3433 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3438 * cik_ring_test - basic gfx ring test
3440 * @rdev: radeon_device pointer
3441 * @ring: radeon_ring structure holding ring information
3443 * Allocate a scratch register and write to it using the gfx ring (CIK).
3444 * Provides a basic gfx ring test to verify that the ring is working.
3445 * Used by cik_cp_gfx_resume();
3446 * Returns 0 on success, error on failure.
3448 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3455 r = radeon_scratch_get(rdev, &scratch);
3457 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3460 WREG32(scratch, 0xCAFEDEAD);
3461 r = radeon_ring_lock(rdev, ring, 3);
3463 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3464 radeon_scratch_free(rdev, scratch);
3467 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3468 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3469 radeon_ring_write(ring, 0xDEADBEEF);
3470 radeon_ring_unlock_commit(rdev, ring);
3472 for (i = 0; i < rdev->usec_timeout; i++) {
3473 tmp = RREG32(scratch);
3474 if (tmp == 0xDEADBEEF)
3478 if (i < rdev->usec_timeout) {
3479 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3481 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3482 ring->idx, scratch, tmp);
3485 radeon_scratch_free(rdev, scratch);
3490 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3492 * @rdev: radeon_device pointer
3493 * @ridx: radeon ring index
3495 * Emits an hdp flush on the cp.
3497 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3500 struct radeon_ring *ring = &rdev->ring[ridx];
3503 switch (ring->idx) {
3504 case CAYMAN_RING_TYPE_CP1_INDEX:
3505 case CAYMAN_RING_TYPE_CP2_INDEX:
3509 ref_and_mask = CP2 << ring->pipe;
3512 ref_and_mask = CP6 << ring->pipe;
3518 case RADEON_RING_TYPE_GFX_INDEX:
3523 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3524 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3525 WAIT_REG_MEM_FUNCTION(3) | /* == */
3526 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3527 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3528 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3529 radeon_ring_write(ring, ref_and_mask);
3530 radeon_ring_write(ring, ref_and_mask);
3531 radeon_ring_write(ring, 0x20); /* poll interval */
3535 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3537 * @rdev: radeon_device pointer
3538 * @fence: radeon fence object
3540 * Emits a fence sequnce number on the gfx ring and flushes
3543 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3544 struct radeon_fence *fence)
3546 struct radeon_ring *ring = &rdev->ring[fence->ring];
3547 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3549 /* EVENT_WRITE_EOP - flush caches, send int */
3550 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3551 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3553 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3555 radeon_ring_write(ring, addr & 0xfffffffc);
3556 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3557 radeon_ring_write(ring, fence->seq);
3558 radeon_ring_write(ring, 0);
3560 cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3564 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3566 * @rdev: radeon_device pointer
3567 * @fence: radeon fence object
3569 * Emits a fence sequnce number on the compute ring and flushes
3572 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3573 struct radeon_fence *fence)
3575 struct radeon_ring *ring = &rdev->ring[fence->ring];
3576 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3578 /* RELEASE_MEM - flush caches, send int */
3579 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3580 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3582 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3584 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3585 radeon_ring_write(ring, addr & 0xfffffffc);
3586 radeon_ring_write(ring, upper_32_bits(addr));
3587 radeon_ring_write(ring, fence->seq);
3588 radeon_ring_write(ring, 0);
3590 cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3593 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3594 struct radeon_ring *ring,
3595 struct radeon_semaphore *semaphore,
3598 uint64_t addr = semaphore->gpu_addr;
3599 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3601 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3602 radeon_ring_write(ring, addr & 0xffffffff);
3603 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3609 * cik_copy_cpdma - copy pages using the CP DMA engine
3611 * @rdev: radeon_device pointer
3612 * @src_offset: src GPU address
3613 * @dst_offset: dst GPU address
3614 * @num_gpu_pages: number of GPU pages to xfer
3615 * @fence: radeon fence object
3617 * Copy GPU paging using the CP DMA engine (CIK+).
3618 * Used by the radeon ttm implementation to move pages if
3619 * registered as the asic copy callback.
3621 int cik_copy_cpdma(struct radeon_device *rdev,
3622 uint64_t src_offset, uint64_t dst_offset,
3623 unsigned num_gpu_pages,
3624 struct radeon_fence **fence)
3626 struct radeon_semaphore *sem = NULL;
3627 int ring_index = rdev->asic->copy.blit_ring_index;
3628 struct radeon_ring *ring = &rdev->ring[ring_index];
3629 u32 size_in_bytes, cur_size_in_bytes, control;
3633 r = radeon_semaphore_create(rdev, &sem);
3635 DRM_ERROR("radeon: moving bo (%d).\n", r);
3639 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3640 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3641 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3643 DRM_ERROR("radeon: moving bo (%d).\n", r);
3644 radeon_semaphore_free(rdev, &sem, NULL);
3648 radeon_semaphore_sync_to(sem, *fence);
3649 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3651 for (i = 0; i < num_loops; i++) {
3652 cur_size_in_bytes = size_in_bytes;
3653 if (cur_size_in_bytes > 0x1fffff)
3654 cur_size_in_bytes = 0x1fffff;
3655 size_in_bytes -= cur_size_in_bytes;
3657 if (size_in_bytes == 0)
3658 control |= PACKET3_DMA_DATA_CP_SYNC;
3659 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3660 radeon_ring_write(ring, control);
3661 radeon_ring_write(ring, lower_32_bits(src_offset));
3662 radeon_ring_write(ring, upper_32_bits(src_offset));
3663 radeon_ring_write(ring, lower_32_bits(dst_offset));
3664 radeon_ring_write(ring, upper_32_bits(dst_offset));
3665 radeon_ring_write(ring, cur_size_in_bytes);
3666 src_offset += cur_size_in_bytes;
3667 dst_offset += cur_size_in_bytes;
3670 r = radeon_fence_emit(rdev, fence, ring->idx);
3672 radeon_ring_unlock_undo(rdev, ring);
3676 radeon_ring_unlock_commit(rdev, ring);
3677 radeon_semaphore_free(rdev, &sem, *fence);
3686 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3688 * @rdev: radeon_device pointer
3689 * @ib: radeon indirect buffer object
3691 * Emits an DE (drawing engine) or CE (constant engine) IB
3692 * on the gfx ring. IBs are usually generated by userspace
3693 * acceleration drivers and submitted to the kernel for
3694 * sheduling on the ring. This function schedules the IB
3695 * on the gfx ring for execution by the GPU.
3697 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3699 struct radeon_ring *ring = &rdev->ring[ib->ring];
3700 u32 header, control = INDIRECT_BUFFER_VALID;
3702 if (ib->is_const_ib) {
3703 /* set switch buffer packet before const IB */
3704 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3705 radeon_ring_write(ring, 0);
3707 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3710 if (ring->rptr_save_reg) {
3711 next_rptr = ring->wptr + 3 + 4;
3712 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3713 radeon_ring_write(ring, ((ring->rptr_save_reg -
3714 PACKET3_SET_UCONFIG_REG_START) >> 2));
3715 radeon_ring_write(ring, next_rptr);
3716 } else if (rdev->wb.enabled) {
3717 next_rptr = ring->wptr + 5 + 4;
3718 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3719 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3720 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3721 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3722 radeon_ring_write(ring, next_rptr);
3725 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3728 control |= ib->length_dw |
3729 (ib->vm ? (ib->vm->id << 24) : 0);
3731 radeon_ring_write(ring, header);
3732 radeon_ring_write(ring,
3736 (ib->gpu_addr & 0xFFFFFFFC));
3737 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3738 radeon_ring_write(ring, control);
3742 * cik_ib_test - basic gfx ring IB test
3744 * @rdev: radeon_device pointer
3745 * @ring: radeon_ring structure holding ring information
3747 * Allocate an IB and execute it on the gfx ring (CIK).
3748 * Provides a basic gfx ring test to verify that IBs are working.
3749 * Returns 0 on success, error on failure.
3751 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3753 struct radeon_ib ib;
3759 r = radeon_scratch_get(rdev, &scratch);
3761 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3764 WREG32(scratch, 0xCAFEDEAD);
3765 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3767 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3768 radeon_scratch_free(rdev, scratch);
3771 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3772 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3773 ib.ptr[2] = 0xDEADBEEF;
3775 r = radeon_ib_schedule(rdev, &ib, NULL);
3777 radeon_scratch_free(rdev, scratch);
3778 radeon_ib_free(rdev, &ib);
3779 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3782 r = radeon_fence_wait(ib.fence, false);
3784 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3785 radeon_scratch_free(rdev, scratch);
3786 radeon_ib_free(rdev, &ib);
3789 for (i = 0; i < rdev->usec_timeout; i++) {
3790 tmp = RREG32(scratch);
3791 if (tmp == 0xDEADBEEF)
3795 if (i < rdev->usec_timeout) {
3796 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3798 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3802 radeon_scratch_free(rdev, scratch);
3803 radeon_ib_free(rdev, &ib);
3809 * On CIK, gfx and compute now have independant command processors.
3812 * Gfx consists of a single ring and can process both gfx jobs and
3813 * compute jobs. The gfx CP consists of three microengines (ME):
3814 * PFP - Pre-Fetch Parser
3816 * CE - Constant Engine
3817 * The PFP and ME make up what is considered the Drawing Engine (DE).
3818 * The CE is an asynchronous engine used for updating buffer desciptors
3819 * used by the DE so that they can be loaded into cache in parallel
3820 * while the DE is processing state update packets.
3823 * The compute CP consists of two microengines (ME):
3824 * MEC1 - Compute MicroEngine 1
3825 * MEC2 - Compute MicroEngine 2
3826 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3827 * The queues are exposed to userspace and are programmed directly
3828 * by the compute runtime.
3831 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3833 * @rdev: radeon_device pointer
3834 * @enable: enable or disable the MEs
3836 * Halts or unhalts the gfx MEs.
3838 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3841 WREG32(CP_ME_CNTL, 0);
3843 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3844 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3850 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3852 * @rdev: radeon_device pointer
3854 * Loads the gfx PFP, ME, and CE ucode.
3855 * Returns 0 for success, -EINVAL if the ucode is not available.
3857 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3859 const __be32 *fw_data;
3862 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3865 cik_cp_gfx_enable(rdev, false);
3868 fw_data = (const __be32 *)rdev->pfp_fw->data;
3869 WREG32(CP_PFP_UCODE_ADDR, 0);
3870 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3871 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3872 WREG32(CP_PFP_UCODE_ADDR, 0);
3875 fw_data = (const __be32 *)rdev->ce_fw->data;
3876 WREG32(CP_CE_UCODE_ADDR, 0);
3877 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3878 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3879 WREG32(CP_CE_UCODE_ADDR, 0);
3882 fw_data = (const __be32 *)rdev->me_fw->data;
3883 WREG32(CP_ME_RAM_WADDR, 0);
3884 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3885 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3886 WREG32(CP_ME_RAM_WADDR, 0);
3888 WREG32(CP_PFP_UCODE_ADDR, 0);
3889 WREG32(CP_CE_UCODE_ADDR, 0);
3890 WREG32(CP_ME_RAM_WADDR, 0);
3891 WREG32(CP_ME_RAM_RADDR, 0);
3896 * cik_cp_gfx_start - start the gfx ring
3898 * @rdev: radeon_device pointer
3900 * Enables the ring and loads the clear state context and other
3901 * packets required to init the ring.
3902 * Returns 0 for success, error for failure.
3904 static int cik_cp_gfx_start(struct radeon_device *rdev)
3906 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3910 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3911 WREG32(CP_ENDIAN_SWAP, 0);
3912 WREG32(CP_DEVICE_ID, 1);
3914 cik_cp_gfx_enable(rdev, true);
3916 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3918 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3922 /* init the CE partitions. CE only used for gfx on CIK */
3923 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3924 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3925 radeon_ring_write(ring, 0xc000);
3926 radeon_ring_write(ring, 0xc000);
3928 /* setup clear context state */
3929 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3930 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3932 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3933 radeon_ring_write(ring, 0x80000000);
3934 radeon_ring_write(ring, 0x80000000);
3936 for (i = 0; i < cik_default_size; i++)
3937 radeon_ring_write(ring, cik_default_state[i]);
3939 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3940 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3942 /* set clear context state */
3943 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3944 radeon_ring_write(ring, 0);
3946 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3947 radeon_ring_write(ring, 0x00000316);
3948 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3949 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3951 radeon_ring_unlock_commit(rdev, ring);
3957 * cik_cp_gfx_fini - stop the gfx ring
3959 * @rdev: radeon_device pointer
3961 * Stop the gfx ring and tear down the driver ring
3964 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3966 cik_cp_gfx_enable(rdev, false);
3967 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3971 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3973 * @rdev: radeon_device pointer
3975 * Program the location and size of the gfx ring buffer
3976 * and test it to make sure it's working.
3977 * Returns 0 for success, error for failure.
3979 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3981 struct radeon_ring *ring;
3987 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3988 if (rdev->family != CHIP_HAWAII)
3989 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3991 /* Set the write pointer delay */
3992 WREG32(CP_RB_WPTR_DELAY, 0);
3994 /* set the RB to use vmid 0 */
3995 WREG32(CP_RB_VMID, 0);
3997 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3999 /* ring 0 - compute and gfx */
4000 /* Set ring buffer size */
4001 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4002 rb_bufsz = order_base_2(ring->ring_size / 8);
4003 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4005 tmp |= BUF_SWAP_32BIT;
4007 WREG32(CP_RB0_CNTL, tmp);
4009 /* Initialize the ring buffer's read and write pointers */
4010 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4012 WREG32(CP_RB0_WPTR, ring->wptr);
4014 /* set the wb address wether it's enabled or not */
4015 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4016 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4018 /* scratch register shadowing is no longer supported */
4019 WREG32(SCRATCH_UMSK, 0);
4021 if (!rdev->wb.enabled)
4022 tmp |= RB_NO_UPDATE;
4025 WREG32(CP_RB0_CNTL, tmp);
4027 rb_addr = ring->gpu_addr >> 8;
4028 WREG32(CP_RB0_BASE, rb_addr);
4029 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4031 ring->rptr = RREG32(CP_RB0_RPTR);
4033 /* start the ring */
4034 cik_cp_gfx_start(rdev);
4035 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4036 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4038 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4044 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4045 struct radeon_ring *ring)
4049 if (rdev->wb.enabled)
4050 rptr = rdev->wb.wb[ring->rptr_offs/4];
4052 rptr = RREG32(CP_RB0_RPTR);
4057 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4058 struct radeon_ring *ring)
4062 wptr = RREG32(CP_RB0_WPTR);
4067 void cik_gfx_set_wptr(struct radeon_device *rdev,
4068 struct radeon_ring *ring)
4070 WREG32(CP_RB0_WPTR, ring->wptr);
4071 (void)RREG32(CP_RB0_WPTR);
4074 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4075 struct radeon_ring *ring)
4079 if (rdev->wb.enabled) {
4080 rptr = rdev->wb.wb[ring->rptr_offs/4];
4082 mutex_lock(&rdev->srbm_mutex);
4083 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4084 rptr = RREG32(CP_HQD_PQ_RPTR);
4085 cik_srbm_select(rdev, 0, 0, 0, 0);
4086 mutex_unlock(&rdev->srbm_mutex);
4092 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4093 struct radeon_ring *ring)
4097 if (rdev->wb.enabled) {
4098 /* XXX check if swapping is necessary on BE */
4099 wptr = rdev->wb.wb[ring->wptr_offs/4];
4101 mutex_lock(&rdev->srbm_mutex);
4102 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4103 wptr = RREG32(CP_HQD_PQ_WPTR);
4104 cik_srbm_select(rdev, 0, 0, 0, 0);
4105 mutex_unlock(&rdev->srbm_mutex);
4111 void cik_compute_set_wptr(struct radeon_device *rdev,
4112 struct radeon_ring *ring)
4114 /* XXX check if swapping is necessary on BE */
4115 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4116 WDOORBELL32(ring->doorbell_index, ring->wptr);
4120 * cik_cp_compute_enable - enable/disable the compute CP MEs
4122 * @rdev: radeon_device pointer
4123 * @enable: enable or disable the MEs
4125 * Halts or unhalts the compute MEs.
4127 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4130 WREG32(CP_MEC_CNTL, 0);
4132 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4137 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4139 * @rdev: radeon_device pointer
4141 * Loads the compute MEC1&2 ucode.
4142 * Returns 0 for success, -EINVAL if the ucode is not available.
4144 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4146 const __be32 *fw_data;
4152 cik_cp_compute_enable(rdev, false);
4155 fw_data = (const __be32 *)rdev->mec_fw->data;
4156 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4157 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4158 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4159 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4161 if (rdev->family == CHIP_KAVERI) {
4163 fw_data = (const __be32 *)rdev->mec_fw->data;
4164 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4165 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4166 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4167 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4174 * cik_cp_compute_start - start the compute queues
4176 * @rdev: radeon_device pointer
4178 * Enable the compute queues.
4179 * Returns 0 for success, error for failure.
4181 static int cik_cp_compute_start(struct radeon_device *rdev)
4183 cik_cp_compute_enable(rdev, true);
4189 * cik_cp_compute_fini - stop the compute queues
4191 * @rdev: radeon_device pointer
4193 * Stop the compute queues and tear down the driver queue
4196 static void cik_cp_compute_fini(struct radeon_device *rdev)
4200 cik_cp_compute_enable(rdev, false);
4202 for (i = 0; i < 2; i++) {
4204 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4206 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4208 if (rdev->ring[idx].mqd_obj) {
4209 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4210 if (unlikely(r != 0))
4211 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4213 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4214 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4216 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4217 rdev->ring[idx].mqd_obj = NULL;
4222 static void cik_mec_fini(struct radeon_device *rdev)
4226 if (rdev->mec.hpd_eop_obj) {
4227 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4228 if (unlikely(r != 0))
4229 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4230 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4231 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4233 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4234 rdev->mec.hpd_eop_obj = NULL;
4238 #define MEC_HPD_SIZE 2048
4240 static int cik_mec_init(struct radeon_device *rdev)
4246 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4247 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4249 if (rdev->family == CHIP_KAVERI)
4250 rdev->mec.num_mec = 2;
4252 rdev->mec.num_mec = 1;
4253 rdev->mec.num_pipe = 4;
4254 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4256 if (rdev->mec.hpd_eop_obj == NULL) {
4257 r = radeon_bo_create(rdev,
4258 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4260 RADEON_GEM_DOMAIN_GTT, NULL,
4261 &rdev->mec.hpd_eop_obj);
4263 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4268 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4269 if (unlikely(r != 0)) {
4273 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4274 &rdev->mec.hpd_eop_gpu_addr);
4276 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4280 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4282 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4287 /* clear memory. Not sure if this is required or not */
4288 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4290 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4291 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4296 struct hqd_registers
4298 u32 cp_mqd_base_addr;
4299 u32 cp_mqd_base_addr_hi;
4302 u32 cp_hqd_persistent_state;
4303 u32 cp_hqd_pipe_priority;
4304 u32 cp_hqd_queue_priority;
4307 u32 cp_hqd_pq_base_hi;
4309 u32 cp_hqd_pq_rptr_report_addr;
4310 u32 cp_hqd_pq_rptr_report_addr_hi;
4311 u32 cp_hqd_pq_wptr_poll_addr;
4312 u32 cp_hqd_pq_wptr_poll_addr_hi;
4313 u32 cp_hqd_pq_doorbell_control;
4315 u32 cp_hqd_pq_control;
4316 u32 cp_hqd_ib_base_addr;
4317 u32 cp_hqd_ib_base_addr_hi;
4319 u32 cp_hqd_ib_control;
4320 u32 cp_hqd_iq_timer;
4322 u32 cp_hqd_dequeue_request;
4323 u32 cp_hqd_dma_offload;
4324 u32 cp_hqd_sema_cmd;
4325 u32 cp_hqd_msg_type;
4326 u32 cp_hqd_atomic0_preop_lo;
4327 u32 cp_hqd_atomic0_preop_hi;
4328 u32 cp_hqd_atomic1_preop_lo;
4329 u32 cp_hqd_atomic1_preop_hi;
4330 u32 cp_hqd_hq_scheduler0;
4331 u32 cp_hqd_hq_scheduler1;
4338 u32 dispatch_initiator;
4342 u32 pipeline_stat_enable;
4343 u32 perf_counter_enable;
4349 u32 resource_limits;
4350 u32 static_thread_mgmt01[2];
4352 u32 static_thread_mgmt23[2];
4354 u32 thread_trace_enable;
4357 u32 vgtcs_invoke_count[2];
4358 struct hqd_registers queue_state;
4360 u32 interrupt_queue[64];
4364 * cik_cp_compute_resume - setup the compute queue registers
4366 * @rdev: radeon_device pointer
4368 * Program the compute queues and test them to make sure they
4370 * Returns 0 for success, error for failure.
4372 static int cik_cp_compute_resume(struct radeon_device *rdev)
4376 bool use_doorbell = true;
4382 struct bonaire_mqd *mqd;
4384 r = cik_cp_compute_start(rdev);
4388 /* fix up chicken bits */
4389 tmp = RREG32(CP_CPF_DEBUG);
4391 WREG32(CP_CPF_DEBUG, tmp);
4393 /* init the pipes */
4394 mutex_lock(&rdev->srbm_mutex);
4395 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4396 int me = (i < 4) ? 1 : 2;
4397 int pipe = (i < 4) ? i : (i - 4);
4399 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4401 cik_srbm_select(rdev, me, pipe, 0, 0);
4403 /* write the EOP addr */
4404 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4405 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4407 /* set the VMID assigned */
4408 WREG32(CP_HPD_EOP_VMID, 0);
4410 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4411 tmp = RREG32(CP_HPD_EOP_CONTROL);
4412 tmp &= ~EOP_SIZE_MASK;
4413 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4414 WREG32(CP_HPD_EOP_CONTROL, tmp);
4416 cik_srbm_select(rdev, 0, 0, 0, 0);
4417 mutex_unlock(&rdev->srbm_mutex);
4419 /* init the queues. Just two for now. */
4420 for (i = 0; i < 2; i++) {
4422 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4424 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4426 if (rdev->ring[idx].mqd_obj == NULL) {
4427 r = radeon_bo_create(rdev,
4428 sizeof(struct bonaire_mqd),
4430 RADEON_GEM_DOMAIN_GTT, NULL,
4431 &rdev->ring[idx].mqd_obj);
4433 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4438 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4439 if (unlikely(r != 0)) {
4440 cik_cp_compute_fini(rdev);
4443 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4446 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4447 cik_cp_compute_fini(rdev);
4450 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4452 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4453 cik_cp_compute_fini(rdev);
4457 /* init the mqd struct */
4458 memset(buf, 0, sizeof(struct bonaire_mqd));
4460 mqd = (struct bonaire_mqd *)buf;
4461 mqd->header = 0xC0310800;
4462 mqd->static_thread_mgmt01[0] = 0xffffffff;
4463 mqd->static_thread_mgmt01[1] = 0xffffffff;
4464 mqd->static_thread_mgmt23[0] = 0xffffffff;
4465 mqd->static_thread_mgmt23[1] = 0xffffffff;
4467 mutex_lock(&rdev->srbm_mutex);
4468 cik_srbm_select(rdev, rdev->ring[idx].me,
4469 rdev->ring[idx].pipe,
4470 rdev->ring[idx].queue, 0);
4472 /* disable wptr polling */
4473 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4474 tmp &= ~WPTR_POLL_EN;
4475 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4477 /* enable doorbell? */
4478 mqd->queue_state.cp_hqd_pq_doorbell_control =
4479 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4481 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4483 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4484 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4485 mqd->queue_state.cp_hqd_pq_doorbell_control);
4487 /* disable the queue if it's active */
4488 mqd->queue_state.cp_hqd_dequeue_request = 0;
4489 mqd->queue_state.cp_hqd_pq_rptr = 0;
4490 mqd->queue_state.cp_hqd_pq_wptr= 0;
4491 if (RREG32(CP_HQD_ACTIVE) & 1) {
4492 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4493 for (i = 0; i < rdev->usec_timeout; i++) {
4494 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4498 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4499 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4500 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4503 /* set the pointer to the MQD */
4504 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4505 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4506 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4507 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4508 /* set MQD vmid to 0 */
4509 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4510 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4511 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4513 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4514 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4515 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4516 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4517 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4518 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4520 /* set up the HQD, this is similar to CP_RB0_CNTL */
4521 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4522 mqd->queue_state.cp_hqd_pq_control &=
4523 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4525 mqd->queue_state.cp_hqd_pq_control |=
4526 order_base_2(rdev->ring[idx].ring_size / 8);
4527 mqd->queue_state.cp_hqd_pq_control |=
4528 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4530 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4532 mqd->queue_state.cp_hqd_pq_control &=
4533 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4534 mqd->queue_state.cp_hqd_pq_control |=
4535 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4536 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4538 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4540 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4542 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4543 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4544 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4545 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4546 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4547 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4549 /* set the wb address wether it's enabled or not */
4551 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4553 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4554 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4555 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4556 upper_32_bits(wb_gpu_addr) & 0xffff;
4557 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4558 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4559 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4560 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4562 /* enable the doorbell if requested */
4564 mqd->queue_state.cp_hqd_pq_doorbell_control =
4565 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4566 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4567 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4568 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4569 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4570 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4571 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4574 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4576 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4577 mqd->queue_state.cp_hqd_pq_doorbell_control);
4579 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4580 rdev->ring[idx].wptr = 0;
4581 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4582 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4583 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4584 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4586 /* set the vmid for the queue */
4587 mqd->queue_state.cp_hqd_vmid = 0;
4588 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4590 /* activate the queue */
4591 mqd->queue_state.cp_hqd_active = 1;
4592 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4594 cik_srbm_select(rdev, 0, 0, 0, 0);
4595 mutex_unlock(&rdev->srbm_mutex);
4597 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4598 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4600 rdev->ring[idx].ready = true;
4601 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4603 rdev->ring[idx].ready = false;
4609 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4611 cik_cp_gfx_enable(rdev, enable);
4612 cik_cp_compute_enable(rdev, enable);
4615 static int cik_cp_load_microcode(struct radeon_device *rdev)
4619 r = cik_cp_gfx_load_microcode(rdev);
4622 r = cik_cp_compute_load_microcode(rdev);
4629 static void cik_cp_fini(struct radeon_device *rdev)
4631 cik_cp_gfx_fini(rdev);
4632 cik_cp_compute_fini(rdev);
4635 static int cik_cp_resume(struct radeon_device *rdev)
4639 cik_enable_gui_idle_interrupt(rdev, false);
4641 r = cik_cp_load_microcode(rdev);
4645 r = cik_cp_gfx_resume(rdev);
4648 r = cik_cp_compute_resume(rdev);
4652 cik_enable_gui_idle_interrupt(rdev, true);
4657 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4659 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4660 RREG32(GRBM_STATUS));
4661 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4662 RREG32(GRBM_STATUS2));
4663 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4664 RREG32(GRBM_STATUS_SE0));
4665 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4666 RREG32(GRBM_STATUS_SE1));
4667 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4668 RREG32(GRBM_STATUS_SE2));
4669 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4670 RREG32(GRBM_STATUS_SE3));
4671 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4672 RREG32(SRBM_STATUS));
4673 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4674 RREG32(SRBM_STATUS2));
4675 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4676 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4677 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4678 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4679 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4680 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4681 RREG32(CP_STALLED_STAT1));
4682 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4683 RREG32(CP_STALLED_STAT2));
4684 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4685 RREG32(CP_STALLED_STAT3));
4686 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4687 RREG32(CP_CPF_BUSY_STAT));
4688 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4689 RREG32(CP_CPF_STALLED_STAT1));
4690 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4691 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4692 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4693 RREG32(CP_CPC_STALLED_STAT1));
4694 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4698 * cik_gpu_check_soft_reset - check which blocks are busy
4700 * @rdev: radeon_device pointer
4702 * Check which blocks are busy and return the relevant reset
4703 * mask to be used by cik_gpu_soft_reset().
4704 * Returns a mask of the blocks to be reset.
4706 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4712 tmp = RREG32(GRBM_STATUS);
4713 if (tmp & (PA_BUSY | SC_BUSY |
4714 BCI_BUSY | SX_BUSY |
4715 TA_BUSY | VGT_BUSY |
4717 GDS_BUSY | SPI_BUSY |
4718 IA_BUSY | IA_BUSY_NO_DMA))
4719 reset_mask |= RADEON_RESET_GFX;
4721 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4722 reset_mask |= RADEON_RESET_CP;
4725 tmp = RREG32(GRBM_STATUS2);
4727 reset_mask |= RADEON_RESET_RLC;
4729 /* SDMA0_STATUS_REG */
4730 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4731 if (!(tmp & SDMA_IDLE))
4732 reset_mask |= RADEON_RESET_DMA;
4734 /* SDMA1_STATUS_REG */
4735 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4736 if (!(tmp & SDMA_IDLE))
4737 reset_mask |= RADEON_RESET_DMA1;
4740 tmp = RREG32(SRBM_STATUS2);
4741 if (tmp & SDMA_BUSY)
4742 reset_mask |= RADEON_RESET_DMA;
4744 if (tmp & SDMA1_BUSY)
4745 reset_mask |= RADEON_RESET_DMA1;
4748 tmp = RREG32(SRBM_STATUS);
4751 reset_mask |= RADEON_RESET_IH;
4754 reset_mask |= RADEON_RESET_SEM;
4756 if (tmp & GRBM_RQ_PENDING)
4757 reset_mask |= RADEON_RESET_GRBM;
4760 reset_mask |= RADEON_RESET_VMC;
4762 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4763 MCC_BUSY | MCD_BUSY))
4764 reset_mask |= RADEON_RESET_MC;
4766 if (evergreen_is_display_hung(rdev))
4767 reset_mask |= RADEON_RESET_DISPLAY;
4769 /* Skip MC reset as it's mostly likely not hung, just busy */
4770 if (reset_mask & RADEON_RESET_MC) {
4771 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4772 reset_mask &= ~RADEON_RESET_MC;
4779 * cik_gpu_soft_reset - soft reset GPU
4781 * @rdev: radeon_device pointer
4782 * @reset_mask: mask of which blocks to reset
4784 * Soft reset the blocks specified in @reset_mask.
4786 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4788 struct evergreen_mc_save save;
4789 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4792 if (reset_mask == 0)
4795 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4797 cik_print_gpu_status_regs(rdev);
4798 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4799 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4800 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4801 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4810 /* Disable GFX parsing/prefetching */
4811 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4813 /* Disable MEC parsing/prefetching */
4814 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4816 if (reset_mask & RADEON_RESET_DMA) {
4818 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4820 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4822 if (reset_mask & RADEON_RESET_DMA1) {
4824 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4826 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4829 evergreen_mc_stop(rdev, &save);
4830 if (evergreen_mc_wait_for_idle(rdev)) {
4831 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4834 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4835 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4837 if (reset_mask & RADEON_RESET_CP) {
4838 grbm_soft_reset |= SOFT_RESET_CP;
4840 srbm_soft_reset |= SOFT_RESET_GRBM;
4843 if (reset_mask & RADEON_RESET_DMA)
4844 srbm_soft_reset |= SOFT_RESET_SDMA;
4846 if (reset_mask & RADEON_RESET_DMA1)
4847 srbm_soft_reset |= SOFT_RESET_SDMA1;
4849 if (reset_mask & RADEON_RESET_DISPLAY)
4850 srbm_soft_reset |= SOFT_RESET_DC;
4852 if (reset_mask & RADEON_RESET_RLC)
4853 grbm_soft_reset |= SOFT_RESET_RLC;
4855 if (reset_mask & RADEON_RESET_SEM)
4856 srbm_soft_reset |= SOFT_RESET_SEM;
4858 if (reset_mask & RADEON_RESET_IH)
4859 srbm_soft_reset |= SOFT_RESET_IH;
4861 if (reset_mask & RADEON_RESET_GRBM)
4862 srbm_soft_reset |= SOFT_RESET_GRBM;
4864 if (reset_mask & RADEON_RESET_VMC)
4865 srbm_soft_reset |= SOFT_RESET_VMC;
4867 if (!(rdev->flags & RADEON_IS_IGP)) {
4868 if (reset_mask & RADEON_RESET_MC)
4869 srbm_soft_reset |= SOFT_RESET_MC;
4872 if (grbm_soft_reset) {
4873 tmp = RREG32(GRBM_SOFT_RESET);
4874 tmp |= grbm_soft_reset;
4875 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4876 WREG32(GRBM_SOFT_RESET, tmp);
4877 tmp = RREG32(GRBM_SOFT_RESET);
4881 tmp &= ~grbm_soft_reset;
4882 WREG32(GRBM_SOFT_RESET, tmp);
4883 tmp = RREG32(GRBM_SOFT_RESET);
4886 if (srbm_soft_reset) {
4887 tmp = RREG32(SRBM_SOFT_RESET);
4888 tmp |= srbm_soft_reset;
4889 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4890 WREG32(SRBM_SOFT_RESET, tmp);
4891 tmp = RREG32(SRBM_SOFT_RESET);
4895 tmp &= ~srbm_soft_reset;
4896 WREG32(SRBM_SOFT_RESET, tmp);
4897 tmp = RREG32(SRBM_SOFT_RESET);
4900 /* Wait a little for things to settle down */
4903 evergreen_mc_resume(rdev, &save);
4906 cik_print_gpu_status_regs(rdev);
4909 struct kv_reset_save_regs {
4910 u32 gmcon_reng_execute;
4915 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4916 struct kv_reset_save_regs *save)
4918 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4919 save->gmcon_misc = RREG32(GMCON_MISC);
4920 save->gmcon_misc3 = RREG32(GMCON_MISC3);
4922 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4923 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4924 STCTRL_STUTTER_EN));
4927 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4928 struct kv_reset_save_regs *save)
4932 WREG32(GMCON_PGFSM_WRITE, 0);
4933 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4935 for (i = 0; i < 5; i++)
4936 WREG32(GMCON_PGFSM_WRITE, 0);
4938 WREG32(GMCON_PGFSM_WRITE, 0);
4939 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4941 for (i = 0; i < 5; i++)
4942 WREG32(GMCON_PGFSM_WRITE, 0);
4944 WREG32(GMCON_PGFSM_WRITE, 0x210000);
4945 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4947 for (i = 0; i < 5; i++)
4948 WREG32(GMCON_PGFSM_WRITE, 0);
4950 WREG32(GMCON_PGFSM_WRITE, 0x21003);
4951 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4953 for (i = 0; i < 5; i++)
4954 WREG32(GMCON_PGFSM_WRITE, 0);
4956 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4957 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4959 for (i = 0; i < 5; i++)
4960 WREG32(GMCON_PGFSM_WRITE, 0);
4962 WREG32(GMCON_PGFSM_WRITE, 0);
4963 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4965 for (i = 0; i < 5; i++)
4966 WREG32(GMCON_PGFSM_WRITE, 0);
4968 WREG32(GMCON_PGFSM_WRITE, 0x420000);
4969 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4971 for (i = 0; i < 5; i++)
4972 WREG32(GMCON_PGFSM_WRITE, 0);
4974 WREG32(GMCON_PGFSM_WRITE, 0x120202);
4975 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4977 for (i = 0; i < 5; i++)
4978 WREG32(GMCON_PGFSM_WRITE, 0);
4980 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
4981 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
4983 for (i = 0; i < 5; i++)
4984 WREG32(GMCON_PGFSM_WRITE, 0);
4986 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
4987 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
4989 for (i = 0; i < 5; i++)
4990 WREG32(GMCON_PGFSM_WRITE, 0);
4992 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
4993 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
4995 WREG32(GMCON_MISC3, save->gmcon_misc3);
4996 WREG32(GMCON_MISC, save->gmcon_misc);
4997 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5000 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5002 struct evergreen_mc_save save;
5003 struct kv_reset_save_regs kv_save = { 0 };
5006 dev_info(rdev->dev, "GPU pci config reset\n");
5014 /* Disable GFX parsing/prefetching */
5015 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5017 /* Disable MEC parsing/prefetching */
5018 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5021 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5023 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5025 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5027 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5028 /* XXX other engines? */
5030 /* halt the rlc, disable cp internal ints */
5035 /* disable mem access */
5036 evergreen_mc_stop(rdev, &save);
5037 if (evergreen_mc_wait_for_idle(rdev)) {
5038 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5041 if (rdev->flags & RADEON_IS_IGP)
5042 kv_save_regs_for_reset(rdev, &kv_save);
5045 pci_clear_master(rdev->pdev);
5047 radeon_pci_config_reset(rdev);
5051 /* wait for asic to come out of reset */
5052 for (i = 0; i < rdev->usec_timeout; i++) {
5053 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5058 /* does asic init need to be run first??? */
5059 if (rdev->flags & RADEON_IS_IGP)
5060 kv_restore_regs_for_reset(rdev, &kv_save);
5064 * cik_asic_reset - soft reset GPU
5066 * @rdev: radeon_device pointer
5068 * Look up which blocks are hung and attempt
5070 * Returns 0 for success.
5072 int cik_asic_reset(struct radeon_device *rdev)
5076 reset_mask = cik_gpu_check_soft_reset(rdev);
5079 r600_set_bios_scratch_engine_hung(rdev, true);
5081 /* try soft reset */
5082 cik_gpu_soft_reset(rdev, reset_mask);
5084 reset_mask = cik_gpu_check_soft_reset(rdev);
5086 /* try pci config reset */
5087 if (reset_mask && radeon_hard_reset)
5088 cik_gpu_pci_config_reset(rdev);
5090 reset_mask = cik_gpu_check_soft_reset(rdev);
5093 r600_set_bios_scratch_engine_hung(rdev, false);
5099 * cik_gfx_is_lockup - check if the 3D engine is locked up
5101 * @rdev: radeon_device pointer
5102 * @ring: radeon_ring structure holding ring information
5104 * Check if the 3D engine is locked up (CIK).
5105 * Returns true if the engine is locked, false if not.
5107 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5109 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5111 if (!(reset_mask & (RADEON_RESET_GFX |
5112 RADEON_RESET_COMPUTE |
5113 RADEON_RESET_CP))) {
5114 radeon_ring_lockup_update(ring);
5117 /* force CP activities */
5118 radeon_ring_force_activity(rdev, ring);
5119 return radeon_ring_test_lockup(rdev, ring);
5124 * cik_mc_program - program the GPU memory controller
5126 * @rdev: radeon_device pointer
5128 * Set the location of vram, gart, and AGP in the GPU's
5129 * physical address space (CIK).
5131 static void cik_mc_program(struct radeon_device *rdev)
5133 struct evergreen_mc_save save;
5137 /* Initialize HDP */
5138 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5139 WREG32((0x2c14 + j), 0x00000000);
5140 WREG32((0x2c18 + j), 0x00000000);
5141 WREG32((0x2c1c + j), 0x00000000);
5142 WREG32((0x2c20 + j), 0x00000000);
5143 WREG32((0x2c24 + j), 0x00000000);
5145 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5147 evergreen_mc_stop(rdev, &save);
5148 if (radeon_mc_wait_for_idle(rdev)) {
5149 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5151 /* Lockout access through VGA aperture*/
5152 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5153 /* Update configuration */
5154 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5155 rdev->mc.vram_start >> 12);
5156 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5157 rdev->mc.vram_end >> 12);
5158 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5159 rdev->vram_scratch.gpu_addr >> 12);
5160 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5161 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5162 WREG32(MC_VM_FB_LOCATION, tmp);
5163 /* XXX double check these! */
5164 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5165 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5166 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5167 WREG32(MC_VM_AGP_BASE, 0);
5168 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5169 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5170 if (radeon_mc_wait_for_idle(rdev)) {
5171 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5173 evergreen_mc_resume(rdev, &save);
5174 /* we need to own VRAM, so turn off the VGA renderer here
5175 * to stop it overwriting our objects */
5176 rv515_vga_render_disable(rdev);
5180 * cik_mc_init - initialize the memory controller driver params
5182 * @rdev: radeon_device pointer
5184 * Look up the amount of vram, vram width, and decide how to place
5185 * vram and gart within the GPU's physical address space (CIK).
5186 * Returns 0 for success.
5188 static int cik_mc_init(struct radeon_device *rdev)
5191 int chansize, numchan;
5193 /* Get VRAM informations */
5194 rdev->mc.vram_is_ddr = true;
5195 tmp = RREG32(MC_ARB_RAMCFG);
5196 if (tmp & CHANSIZE_MASK) {
5201 tmp = RREG32(MC_SHARED_CHMAP);
5202 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5232 rdev->mc.vram_width = numchan * chansize;
5233 /* Could aper size report 0 ? */
5234 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5235 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5236 /* size in MB on si */
5237 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5238 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5239 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5240 si_vram_gtt_location(rdev, &rdev->mc);
5241 radeon_update_bandwidth_info(rdev);
5248 * VMID 0 is the physical GPU addresses as used by the kernel.
5249 * VMIDs 1-15 are used for userspace clients and are handled
5250 * by the radeon vm/hsa code.
5253 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5255 * @rdev: radeon_device pointer
5257 * Flush the TLB for the VMID 0 page table (CIK).
5259 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5261 /* flush hdp cache */
5262 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5264 /* bits 0-15 are the VM contexts0-15 */
5265 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5269 * cik_pcie_gart_enable - gart enable
5271 * @rdev: radeon_device pointer
5273 * This sets up the TLBs, programs the page tables for VMID0,
5274 * sets up the hw for VMIDs 1-15 which are allocated on
5275 * demand, and sets up the global locations for the LDS, GDS,
5276 * and GPUVM for FSA64 clients (CIK).
5277 * Returns 0 for success, errors for failure.
5279 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5283 if (rdev->gart.robj == NULL) {
5284 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5287 r = radeon_gart_table_vram_pin(rdev);
5290 radeon_gart_restore(rdev);
5291 /* Setup TLB control */
5292 WREG32(MC_VM_MX_L1_TLB_CNTL,
5295 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5296 ENABLE_ADVANCED_DRIVER_MODEL |
5297 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5298 /* Setup L2 cache */
5299 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5300 ENABLE_L2_FRAGMENT_PROCESSING |
5301 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5302 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5303 EFFECTIVE_L2_QUEUE_SIZE(7) |
5304 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5305 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5306 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5307 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5308 /* setup context0 */
5309 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5310 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5311 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5312 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5313 (u32)(rdev->dummy_page.addr >> 12));
5314 WREG32(VM_CONTEXT0_CNTL2, 0);
5315 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5316 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5322 /* empty context1-15 */
5323 /* FIXME start with 4G, once using 2 level pt switch to full
5326 /* set vm size, must be a multiple of 4 */
5327 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5328 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5329 for (i = 1; i < 16; i++) {
5331 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5332 rdev->gart.table_addr >> 12);
5334 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5335 rdev->gart.table_addr >> 12);
5338 /* enable context1-15 */
5339 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5340 (u32)(rdev->dummy_page.addr >> 12));
5341 WREG32(VM_CONTEXT1_CNTL2, 4);
5342 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5343 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5344 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5345 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5346 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5347 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5348 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5349 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5350 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5351 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5352 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5353 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5354 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5356 if (rdev->family == CHIP_KAVERI) {
5357 u32 tmp = RREG32(CHUB_CONTROL);
5359 WREG32(CHUB_CONTROL, tmp);
5362 /* XXX SH_MEM regs */
5363 /* where to put LDS, scratch, GPUVM in FSA64 space */
5364 mutex_lock(&rdev->srbm_mutex);
5365 for (i = 0; i < 16; i++) {
5366 cik_srbm_select(rdev, 0, 0, 0, i);
5367 /* CP and shaders */
5368 WREG32(SH_MEM_CONFIG, 0);
5369 WREG32(SH_MEM_APE1_BASE, 1);
5370 WREG32(SH_MEM_APE1_LIMIT, 0);
5371 WREG32(SH_MEM_BASES, 0);
5373 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5374 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5375 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5376 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5377 /* XXX SDMA RLC - todo */
5379 cik_srbm_select(rdev, 0, 0, 0, 0);
5380 mutex_unlock(&rdev->srbm_mutex);
5382 cik_pcie_gart_tlb_flush(rdev);
5383 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5384 (unsigned)(rdev->mc.gtt_size >> 20),
5385 (unsigned long long)rdev->gart.table_addr);
5386 rdev->gart.ready = true;
5391 * cik_pcie_gart_disable - gart disable
5393 * @rdev: radeon_device pointer
5395 * This disables all VM page table (CIK).
5397 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5399 /* Disable all tables */
5400 WREG32(VM_CONTEXT0_CNTL, 0);
5401 WREG32(VM_CONTEXT1_CNTL, 0);
5402 /* Setup TLB control */
5403 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5404 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5405 /* Setup L2 cache */
5407 ENABLE_L2_FRAGMENT_PROCESSING |
5408 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5409 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5410 EFFECTIVE_L2_QUEUE_SIZE(7) |
5411 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5412 WREG32(VM_L2_CNTL2, 0);
5413 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5414 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5415 radeon_gart_table_vram_unpin(rdev);
5419 * cik_pcie_gart_fini - vm fini callback
5421 * @rdev: radeon_device pointer
5423 * Tears down the driver GART/VM setup (CIK).
5425 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5427 cik_pcie_gart_disable(rdev);
5428 radeon_gart_table_vram_free(rdev);
5429 radeon_gart_fini(rdev);
5434 * cik_ib_parse - vm ib_parse callback
5436 * @rdev: radeon_device pointer
5437 * @ib: indirect buffer pointer
5439 * CIK uses hw IB checking so this is a nop (CIK).
5441 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5448 * VMID 0 is the physical GPU addresses as used by the kernel.
5449 * VMIDs 1-15 are used for userspace clients and are handled
5450 * by the radeon vm/hsa code.
5453 * cik_vm_init - cik vm init callback
5455 * @rdev: radeon_device pointer
5457 * Inits cik specific vm parameters (number of VMs, base of vram for
5458 * VMIDs 1-15) (CIK).
5459 * Returns 0 for success.
5461 int cik_vm_init(struct radeon_device *rdev)
5464 rdev->vm_manager.nvm = 16;
5465 /* base offset of vram pages */
5466 if (rdev->flags & RADEON_IS_IGP) {
5467 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5469 rdev->vm_manager.vram_base_offset = tmp;
5471 rdev->vm_manager.vram_base_offset = 0;
5477 * cik_vm_fini - cik vm fini callback
5479 * @rdev: radeon_device pointer
5481 * Tear down any asic specific VM setup (CIK).
5483 void cik_vm_fini(struct radeon_device *rdev)
5488 * cik_vm_decode_fault - print human readable fault info
5490 * @rdev: radeon_device pointer
5491 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5492 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5494 * Print human readable fault information (CIK).
5496 static void cik_vm_decode_fault(struct radeon_device *rdev,
5497 u32 status, u32 addr, u32 mc_client)
5500 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5501 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5502 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5503 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5505 if (rdev->family == CHIP_HAWAII)
5506 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5508 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5510 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5511 protections, vmid, addr,
5512 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5513 block, mc_client, mc_id);
5517 * cik_vm_flush - cik vm flush using the CP
5519 * @rdev: radeon_device pointer
5521 * Update the page table base and flush the VM TLB
5522 * using the CP (CIK).
5524 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5526 struct radeon_ring *ring = &rdev->ring[ridx];
5531 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5532 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5533 WRITE_DATA_DST_SEL(0)));
5535 radeon_ring_write(ring,
5536 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5538 radeon_ring_write(ring,
5539 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5541 radeon_ring_write(ring, 0);
5542 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5544 /* update SH_MEM_* regs */
5545 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5546 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5547 WRITE_DATA_DST_SEL(0)));
5548 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5549 radeon_ring_write(ring, 0);
5550 radeon_ring_write(ring, VMID(vm->id));
5552 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5553 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5554 WRITE_DATA_DST_SEL(0)));
5555 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5556 radeon_ring_write(ring, 0);
5558 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5559 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5560 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5561 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5563 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5564 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5565 WRITE_DATA_DST_SEL(0)));
5566 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5567 radeon_ring_write(ring, 0);
5568 radeon_ring_write(ring, VMID(0));
5571 cik_hdp_flush_cp_ring_emit(rdev, ridx);
5573 /* bits 0-15 are the VM contexts0-15 */
5574 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5575 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5576 WRITE_DATA_DST_SEL(0)));
5577 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5578 radeon_ring_write(ring, 0);
5579 radeon_ring_write(ring, 1 << vm->id);
5581 /* compute doesn't have PFP */
5582 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5583 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5584 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5585 radeon_ring_write(ring, 0x0);
5591 * The RLC is a multi-purpose microengine that handles a
5592 * variety of functions, the most important of which is
5593 * the interrupt controller.
5595 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5598 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5601 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5603 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5604 WREG32(CP_INT_CNTL_RING0, tmp);
5607 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5611 tmp = RREG32(RLC_LB_CNTL);
5613 tmp |= LOAD_BALANCE_ENABLE;
5615 tmp &= ~LOAD_BALANCE_ENABLE;
5616 WREG32(RLC_LB_CNTL, tmp);
5619 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5624 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5625 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5626 cik_select_se_sh(rdev, i, j);
5627 for (k = 0; k < rdev->usec_timeout; k++) {
5628 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5634 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5636 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5637 for (k = 0; k < rdev->usec_timeout; k++) {
5638 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5644 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5648 tmp = RREG32(RLC_CNTL);
5650 WREG32(RLC_CNTL, rlc);
5653 static u32 cik_halt_rlc(struct radeon_device *rdev)
5657 orig = data = RREG32(RLC_CNTL);
5659 if (data & RLC_ENABLE) {
5662 data &= ~RLC_ENABLE;
5663 WREG32(RLC_CNTL, data);
5665 for (i = 0; i < rdev->usec_timeout; i++) {
5666 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5671 cik_wait_for_rlc_serdes(rdev);
5677 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5681 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5682 WREG32(RLC_GPR_REG2, tmp);
5684 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5685 for (i = 0; i < rdev->usec_timeout; i++) {
5686 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5691 for (i = 0; i < rdev->usec_timeout; i++) {
5692 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5698 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5702 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5703 WREG32(RLC_GPR_REG2, tmp);
5707 * cik_rlc_stop - stop the RLC ME
5709 * @rdev: radeon_device pointer
5711 * Halt the RLC ME (MicroEngine) (CIK).
5713 static void cik_rlc_stop(struct radeon_device *rdev)
5715 WREG32(RLC_CNTL, 0);
5717 cik_enable_gui_idle_interrupt(rdev, false);
5719 cik_wait_for_rlc_serdes(rdev);
5723 * cik_rlc_start - start the RLC ME
5725 * @rdev: radeon_device pointer
5727 * Unhalt the RLC ME (MicroEngine) (CIK).
5729 static void cik_rlc_start(struct radeon_device *rdev)
5731 WREG32(RLC_CNTL, RLC_ENABLE);
5733 cik_enable_gui_idle_interrupt(rdev, true);
5739 * cik_rlc_resume - setup the RLC hw
5741 * @rdev: radeon_device pointer
5743 * Initialize the RLC registers, load the ucode,
5744 * and start the RLC (CIK).
5745 * Returns 0 for success, -EINVAL if the ucode is not available.
5747 static int cik_rlc_resume(struct radeon_device *rdev)
5750 const __be32 *fw_data;
5755 switch (rdev->family) {
5759 size = BONAIRE_RLC_UCODE_SIZE;
5762 size = KV_RLC_UCODE_SIZE;
5765 size = KB_RLC_UCODE_SIZE;
5772 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5773 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5781 WREG32(RLC_LB_CNTR_INIT, 0);
5782 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5784 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5785 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5786 WREG32(RLC_LB_PARAMS, 0x00600408);
5787 WREG32(RLC_LB_CNTL, 0x80000004);
5789 WREG32(RLC_MC_CNTL, 0);
5790 WREG32(RLC_UCODE_CNTL, 0);
5792 fw_data = (const __be32 *)rdev->rlc_fw->data;
5793 WREG32(RLC_GPM_UCODE_ADDR, 0);
5794 for (i = 0; i < size; i++)
5795 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5796 WREG32(RLC_GPM_UCODE_ADDR, 0);
5798 /* XXX - find out what chips support lbpw */
5799 cik_enable_lbpw(rdev, false);
5801 if (rdev->family == CHIP_BONAIRE)
5802 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5804 cik_rlc_start(rdev);
5809 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5811 u32 data, orig, tmp, tmp2;
5813 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5815 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5816 cik_enable_gui_idle_interrupt(rdev, true);
5818 tmp = cik_halt_rlc(rdev);
5820 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5821 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5822 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5823 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5824 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5826 cik_update_rlc(rdev, tmp);
5828 data |= CGCG_EN | CGLS_EN;
5830 cik_enable_gui_idle_interrupt(rdev, false);
5832 RREG32(CB_CGTT_SCLK_CTRL);
5833 RREG32(CB_CGTT_SCLK_CTRL);
5834 RREG32(CB_CGTT_SCLK_CTRL);
5835 RREG32(CB_CGTT_SCLK_CTRL);
5837 data &= ~(CGCG_EN | CGLS_EN);
5841 WREG32(RLC_CGCG_CGLS_CTRL, data);
5845 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5847 u32 data, orig, tmp = 0;
5849 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5850 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5851 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5852 orig = data = RREG32(CP_MEM_SLP_CNTL);
5853 data |= CP_MEM_LS_EN;
5855 WREG32(CP_MEM_SLP_CNTL, data);
5859 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5862 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5864 tmp = cik_halt_rlc(rdev);
5866 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5867 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5868 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5869 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5870 WREG32(RLC_SERDES_WR_CTRL, data);
5872 cik_update_rlc(rdev, tmp);
5874 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5875 orig = data = RREG32(CGTS_SM_CTRL_REG);
5876 data &= ~SM_MODE_MASK;
5877 data |= SM_MODE(0x2);
5878 data |= SM_MODE_ENABLE;
5879 data &= ~CGTS_OVERRIDE;
5880 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5881 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5882 data &= ~CGTS_LS_OVERRIDE;
5883 data &= ~ON_MONITOR_ADD_MASK;
5884 data |= ON_MONITOR_ADD_EN;
5885 data |= ON_MONITOR_ADD(0x96);
5887 WREG32(CGTS_SM_CTRL_REG, data);
5890 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5893 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5895 data = RREG32(RLC_MEM_SLP_CNTL);
5896 if (data & RLC_MEM_LS_EN) {
5897 data &= ~RLC_MEM_LS_EN;
5898 WREG32(RLC_MEM_SLP_CNTL, data);
5901 data = RREG32(CP_MEM_SLP_CNTL);
5902 if (data & CP_MEM_LS_EN) {
5903 data &= ~CP_MEM_LS_EN;
5904 WREG32(CP_MEM_SLP_CNTL, data);
5907 orig = data = RREG32(CGTS_SM_CTRL_REG);
5908 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5910 WREG32(CGTS_SM_CTRL_REG, data);
5912 tmp = cik_halt_rlc(rdev);
5914 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5915 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5916 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5917 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5918 WREG32(RLC_SERDES_WR_CTRL, data);
5920 cik_update_rlc(rdev, tmp);
5924 static const u32 mc_cg_registers[] =
5937 static void cik_enable_mc_ls(struct radeon_device *rdev,
5943 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5944 orig = data = RREG32(mc_cg_registers[i]);
5945 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5946 data |= MC_LS_ENABLE;
5948 data &= ~MC_LS_ENABLE;
5950 WREG32(mc_cg_registers[i], data);
5954 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5960 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5961 orig = data = RREG32(mc_cg_registers[i]);
5962 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5963 data |= MC_CG_ENABLE;
5965 data &= ~MC_CG_ENABLE;
5967 WREG32(mc_cg_registers[i], data);
5971 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5976 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5977 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5978 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5980 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5983 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5985 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5988 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5992 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5997 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5998 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6001 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6003 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6006 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6008 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6011 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6013 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6016 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6020 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6025 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6026 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6028 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6030 orig = data = RREG32(UVD_CGC_CTRL);
6033 WREG32(UVD_CGC_CTRL, data);
6035 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6037 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6039 orig = data = RREG32(UVD_CGC_CTRL);
6042 WREG32(UVD_CGC_CTRL, data);
6046 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6051 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6053 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6054 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6055 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6057 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6058 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6061 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6064 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6069 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6071 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6072 data &= ~CLOCK_GATING_DIS;
6074 data |= CLOCK_GATING_DIS;
6077 WREG32(HDP_HOST_PATH_CNTL, data);
6080 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6085 orig = data = RREG32(HDP_MEM_POWER_LS);
6087 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6088 data |= HDP_LS_ENABLE;
6090 data &= ~HDP_LS_ENABLE;
6093 WREG32(HDP_MEM_POWER_LS, data);
6096 void cik_update_cg(struct radeon_device *rdev,
6097 u32 block, bool enable)
6100 if (block & RADEON_CG_BLOCK_GFX) {
6101 cik_enable_gui_idle_interrupt(rdev, false);
6102 /* order matters! */
6104 cik_enable_mgcg(rdev, true);
6105 cik_enable_cgcg(rdev, true);
6107 cik_enable_cgcg(rdev, false);
6108 cik_enable_mgcg(rdev, false);
6110 cik_enable_gui_idle_interrupt(rdev, true);
6113 if (block & RADEON_CG_BLOCK_MC) {
6114 if (!(rdev->flags & RADEON_IS_IGP)) {
6115 cik_enable_mc_mgcg(rdev, enable);
6116 cik_enable_mc_ls(rdev, enable);
6120 if (block & RADEON_CG_BLOCK_SDMA) {
6121 cik_enable_sdma_mgcg(rdev, enable);
6122 cik_enable_sdma_mgls(rdev, enable);
6125 if (block & RADEON_CG_BLOCK_BIF) {
6126 cik_enable_bif_mgls(rdev, enable);
6129 if (block & RADEON_CG_BLOCK_UVD) {
6131 cik_enable_uvd_mgcg(rdev, enable);
6134 if (block & RADEON_CG_BLOCK_HDP) {
6135 cik_enable_hdp_mgcg(rdev, enable);
6136 cik_enable_hdp_ls(rdev, enable);
6140 static void cik_init_cg(struct radeon_device *rdev)
6143 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6146 si_init_uvd_internal_cg(rdev);
6148 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6149 RADEON_CG_BLOCK_SDMA |
6150 RADEON_CG_BLOCK_BIF |
6151 RADEON_CG_BLOCK_UVD |
6152 RADEON_CG_BLOCK_HDP), true);
6155 static void cik_fini_cg(struct radeon_device *rdev)
6157 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6158 RADEON_CG_BLOCK_SDMA |
6159 RADEON_CG_BLOCK_BIF |
6160 RADEON_CG_BLOCK_UVD |
6161 RADEON_CG_BLOCK_HDP), false);
6163 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6166 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6171 orig = data = RREG32(RLC_PG_CNTL);
6172 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6173 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6175 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6177 WREG32(RLC_PG_CNTL, data);
6180 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6185 orig = data = RREG32(RLC_PG_CNTL);
6186 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6187 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6189 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6191 WREG32(RLC_PG_CNTL, data);
6194 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6198 orig = data = RREG32(RLC_PG_CNTL);
6199 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6200 data &= ~DISABLE_CP_PG;
6202 data |= DISABLE_CP_PG;
6204 WREG32(RLC_PG_CNTL, data);
6207 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6211 orig = data = RREG32(RLC_PG_CNTL);
6212 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6213 data &= ~DISABLE_GDS_PG;
6215 data |= DISABLE_GDS_PG;
6217 WREG32(RLC_PG_CNTL, data);
6220 #define CP_ME_TABLE_SIZE 96
6221 #define CP_ME_TABLE_OFFSET 2048
6222 #define CP_MEC_TABLE_OFFSET 4096
6224 void cik_init_cp_pg_table(struct radeon_device *rdev)
6226 const __be32 *fw_data;
6227 volatile u32 *dst_ptr;
6228 int me, i, max_me = 4;
6232 if (rdev->family == CHIP_KAVERI)
6235 if (rdev->rlc.cp_table_ptr == NULL)
6238 /* write the cp table buffer */
6239 dst_ptr = rdev->rlc.cp_table_ptr;
6240 for (me = 0; me < max_me; me++) {
6242 fw_data = (const __be32 *)rdev->ce_fw->data;
6243 table_offset = CP_ME_TABLE_OFFSET;
6244 } else if (me == 1) {
6245 fw_data = (const __be32 *)rdev->pfp_fw->data;
6246 table_offset = CP_ME_TABLE_OFFSET;
6247 } else if (me == 2) {
6248 fw_data = (const __be32 *)rdev->me_fw->data;
6249 table_offset = CP_ME_TABLE_OFFSET;
6251 fw_data = (const __be32 *)rdev->mec_fw->data;
6252 table_offset = CP_MEC_TABLE_OFFSET;
6255 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6256 dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6258 bo_offset += CP_ME_TABLE_SIZE;
6262 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6267 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6268 orig = data = RREG32(RLC_PG_CNTL);
6269 data |= GFX_PG_ENABLE;
6271 WREG32(RLC_PG_CNTL, data);
6273 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6276 WREG32(RLC_AUTO_PG_CTRL, data);
6278 orig = data = RREG32(RLC_PG_CNTL);
6279 data &= ~GFX_PG_ENABLE;
6281 WREG32(RLC_PG_CNTL, data);
6283 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6284 data &= ~AUTO_PG_EN;
6286 WREG32(RLC_AUTO_PG_CTRL, data);
6288 data = RREG32(DB_RENDER_CONTROL);
6292 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6294 u32 mask = 0, tmp, tmp1;
6297 cik_select_se_sh(rdev, se, sh);
6298 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6299 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6300 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6307 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6312 return (~tmp) & mask;
6315 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6317 u32 i, j, k, active_cu_number = 0;
6318 u32 mask, counter, cu_bitmap;
6321 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6322 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6326 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6327 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6335 active_cu_number += counter;
6336 tmp |= (cu_bitmap << (i * 16 + j * 8));
6340 WREG32(RLC_PG_AO_CU_MASK, tmp);
6342 tmp = RREG32(RLC_MAX_PG_CU);
6343 tmp &= ~MAX_PU_CU_MASK;
6344 tmp |= MAX_PU_CU(active_cu_number);
6345 WREG32(RLC_MAX_PG_CU, tmp);
6348 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6353 orig = data = RREG32(RLC_PG_CNTL);
6354 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6355 data |= STATIC_PER_CU_PG_ENABLE;
6357 data &= ~STATIC_PER_CU_PG_ENABLE;
6359 WREG32(RLC_PG_CNTL, data);
6362 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6367 orig = data = RREG32(RLC_PG_CNTL);
6368 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6369 data |= DYN_PER_CU_PG_ENABLE;
6371 data &= ~DYN_PER_CU_PG_ENABLE;
6373 WREG32(RLC_PG_CNTL, data);
6376 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6377 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6379 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6384 if (rdev->rlc.cs_data) {
6385 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6386 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6387 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6388 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6390 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6391 for (i = 0; i < 3; i++)
6392 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6394 if (rdev->rlc.reg_list) {
6395 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6396 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6397 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6400 orig = data = RREG32(RLC_PG_CNTL);
6403 WREG32(RLC_PG_CNTL, data);
6405 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6406 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6408 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6409 data &= ~IDLE_POLL_COUNT_MASK;
6410 data |= IDLE_POLL_COUNT(0x60);
6411 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6414 WREG32(RLC_PG_DELAY, data);
6416 data = RREG32(RLC_PG_DELAY_2);
6419 WREG32(RLC_PG_DELAY_2, data);
6421 data = RREG32(RLC_AUTO_PG_CTRL);
6422 data &= ~GRBM_REG_SGIT_MASK;
6423 data |= GRBM_REG_SGIT(0x700);
6424 WREG32(RLC_AUTO_PG_CTRL, data);
6428 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6430 cik_enable_gfx_cgpg(rdev, enable);
6431 cik_enable_gfx_static_mgpg(rdev, enable);
6432 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6435 u32 cik_get_csb_size(struct radeon_device *rdev)
6438 const struct cs_section_def *sect = NULL;
6439 const struct cs_extent_def *ext = NULL;
6441 if (rdev->rlc.cs_data == NULL)
6444 /* begin clear state */
6446 /* context control state */
6449 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6450 for (ext = sect->section; ext->extent != NULL; ++ext) {
6451 if (sect->id == SECT_CONTEXT)
6452 count += 2 + ext->reg_count;
6457 /* pa_sc_raster_config/pa_sc_raster_config1 */
6459 /* end clear state */
6467 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6470 const struct cs_section_def *sect = NULL;
6471 const struct cs_extent_def *ext = NULL;
6473 if (rdev->rlc.cs_data == NULL)
6478 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6479 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6481 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6482 buffer[count++] = cpu_to_le32(0x80000000);
6483 buffer[count++] = cpu_to_le32(0x80000000);
6485 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6486 for (ext = sect->section; ext->extent != NULL; ++ext) {
6487 if (sect->id == SECT_CONTEXT) {
6489 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6490 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6491 for (i = 0; i < ext->reg_count; i++)
6492 buffer[count++] = cpu_to_le32(ext->extent[i]);
6499 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6500 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6501 switch (rdev->family) {
6503 buffer[count++] = cpu_to_le32(0x16000012);
6504 buffer[count++] = cpu_to_le32(0x00000000);
6507 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6508 buffer[count++] = cpu_to_le32(0x00000000);
6511 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6512 buffer[count++] = cpu_to_le32(0x00000000);
6515 buffer[count++] = 0x3a00161a;
6516 buffer[count++] = 0x0000002e;
6519 buffer[count++] = cpu_to_le32(0x00000000);
6520 buffer[count++] = cpu_to_le32(0x00000000);
6524 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6525 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6527 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6528 buffer[count++] = cpu_to_le32(0);
6531 static void cik_init_pg(struct radeon_device *rdev)
6533 if (rdev->pg_flags) {
6534 cik_enable_sck_slowdown_on_pu(rdev, true);
6535 cik_enable_sck_slowdown_on_pd(rdev, true);
6536 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6537 cik_init_gfx_cgpg(rdev);
6538 cik_enable_cp_pg(rdev, true);
6539 cik_enable_gds_pg(rdev, true);
6541 cik_init_ao_cu_mask(rdev);
6542 cik_update_gfx_pg(rdev, true);
6546 static void cik_fini_pg(struct radeon_device *rdev)
6548 if (rdev->pg_flags) {
6549 cik_update_gfx_pg(rdev, false);
6550 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6551 cik_enable_cp_pg(rdev, false);
6552 cik_enable_gds_pg(rdev, false);
6559 * Starting with r6xx, interrupts are handled via a ring buffer.
6560 * Ring buffers are areas of GPU accessible memory that the GPU
6561 * writes interrupt vectors into and the host reads vectors out of.
6562 * There is a rptr (read pointer) that determines where the
6563 * host is currently reading, and a wptr (write pointer)
6564 * which determines where the GPU has written. When the
6565 * pointers are equal, the ring is idle. When the GPU
6566 * writes vectors to the ring buffer, it increments the
6567 * wptr. When there is an interrupt, the host then starts
6568 * fetching commands and processing them until the pointers are
6569 * equal again at which point it updates the rptr.
6573 * cik_enable_interrupts - Enable the interrupt ring buffer
6575 * @rdev: radeon_device pointer
6577 * Enable the interrupt ring buffer (CIK).
6579 static void cik_enable_interrupts(struct radeon_device *rdev)
6581 u32 ih_cntl = RREG32(IH_CNTL);
6582 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6584 ih_cntl |= ENABLE_INTR;
6585 ih_rb_cntl |= IH_RB_ENABLE;
6586 WREG32(IH_CNTL, ih_cntl);
6587 WREG32(IH_RB_CNTL, ih_rb_cntl);
6588 rdev->ih.enabled = true;
6592 * cik_disable_interrupts - Disable the interrupt ring buffer
6594 * @rdev: radeon_device pointer
6596 * Disable the interrupt ring buffer (CIK).
6598 static void cik_disable_interrupts(struct radeon_device *rdev)
6600 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6601 u32 ih_cntl = RREG32(IH_CNTL);
6603 ih_rb_cntl &= ~IH_RB_ENABLE;
6604 ih_cntl &= ~ENABLE_INTR;
6605 WREG32(IH_RB_CNTL, ih_rb_cntl);
6606 WREG32(IH_CNTL, ih_cntl);
6607 /* set rptr, wptr to 0 */
6608 WREG32(IH_RB_RPTR, 0);
6609 WREG32(IH_RB_WPTR, 0);
6610 rdev->ih.enabled = false;
6615 * cik_disable_interrupt_state - Disable all interrupt sources
6617 * @rdev: radeon_device pointer
6619 * Clear all interrupt enable bits used by the driver (CIK).
6621 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6626 tmp = RREG32(CP_INT_CNTL_RING0) &
6627 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6628 WREG32(CP_INT_CNTL_RING0, tmp);
6630 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6631 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6632 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6633 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6634 /* compute queues */
6635 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6636 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6637 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6638 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6639 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6640 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6641 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6642 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6644 WREG32(GRBM_INT_CNTL, 0);
6645 /* vline/vblank, etc. */
6646 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6647 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6648 if (rdev->num_crtc >= 4) {
6649 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6650 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6652 if (rdev->num_crtc >= 6) {
6653 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6654 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6658 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6660 /* digital hotplug */
6661 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6662 WREG32(DC_HPD1_INT_CONTROL, tmp);
6663 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6664 WREG32(DC_HPD2_INT_CONTROL, tmp);
6665 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6666 WREG32(DC_HPD3_INT_CONTROL, tmp);
6667 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6668 WREG32(DC_HPD4_INT_CONTROL, tmp);
6669 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6670 WREG32(DC_HPD5_INT_CONTROL, tmp);
6671 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6672 WREG32(DC_HPD6_INT_CONTROL, tmp);
6677 * cik_irq_init - init and enable the interrupt ring
6679 * @rdev: radeon_device pointer
6681 * Allocate a ring buffer for the interrupt controller,
6682 * enable the RLC, disable interrupts, enable the IH
6683 * ring buffer and enable it (CIK).
6684 * Called at device load and reume.
6685 * Returns 0 for success, errors for failure.
6687 static int cik_irq_init(struct radeon_device *rdev)
6691 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6694 ret = r600_ih_ring_alloc(rdev);
6699 cik_disable_interrupts(rdev);
6702 ret = cik_rlc_resume(rdev);
6704 r600_ih_ring_fini(rdev);
6708 /* setup interrupt control */
6709 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6710 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6711 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6712 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6713 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6715 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6716 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6717 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6718 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6720 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6721 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6723 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6724 IH_WPTR_OVERFLOW_CLEAR |
6727 if (rdev->wb.enabled)
6728 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6730 /* set the writeback address whether it's enabled or not */
6731 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6732 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6734 WREG32(IH_RB_CNTL, ih_rb_cntl);
6736 /* set rptr, wptr to 0 */
6737 WREG32(IH_RB_RPTR, 0);
6738 WREG32(IH_RB_WPTR, 0);
6740 /* Default settings for IH_CNTL (disabled at first) */
6741 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6742 /* RPTR_REARM only works if msi's are enabled */
6743 if (rdev->msi_enabled)
6744 ih_cntl |= RPTR_REARM;
6745 WREG32(IH_CNTL, ih_cntl);
6747 /* force the active interrupt state to all disabled */
6748 cik_disable_interrupt_state(rdev);
6750 pci_set_master(rdev->pdev);
6753 cik_enable_interrupts(rdev);
6759 * cik_irq_set - enable/disable interrupt sources
6761 * @rdev: radeon_device pointer
6763 * Enable interrupt sources on the GPU (vblanks, hpd,
6765 * Returns 0 for success, errors for failure.
6767 int cik_irq_set(struct radeon_device *rdev)
6770 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6771 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6772 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6773 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6774 u32 grbm_int_cntl = 0;
6775 u32 dma_cntl, dma_cntl1;
6778 if (!rdev->irq.installed) {
6779 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6782 /* don't enable anything if the ih is disabled */
6783 if (!rdev->ih.enabled) {
6784 cik_disable_interrupts(rdev);
6785 /* force the active interrupt state to all disabled */
6786 cik_disable_interrupt_state(rdev);
6790 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6791 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6792 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6794 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6795 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6796 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6797 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6798 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6799 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6801 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6802 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6804 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6805 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6806 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6807 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6808 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6809 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6810 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6811 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6813 if (rdev->flags & RADEON_IS_IGP)
6814 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6815 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6817 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6818 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6820 /* enable CP interrupts on all rings */
6821 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6822 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6823 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6825 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6826 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6827 DRM_DEBUG("si_irq_set: sw int cp1\n");
6828 if (ring->me == 1) {
6829 switch (ring->pipe) {
6831 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6834 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6837 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6840 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6843 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6846 } else if (ring->me == 2) {
6847 switch (ring->pipe) {
6849 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6852 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6855 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6858 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6861 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6865 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6868 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6869 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6870 DRM_DEBUG("si_irq_set: sw int cp2\n");
6871 if (ring->me == 1) {
6872 switch (ring->pipe) {
6874 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6877 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6880 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6883 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6886 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6889 } else if (ring->me == 2) {
6890 switch (ring->pipe) {
6892 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6895 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6898 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6901 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6904 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6908 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6912 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6913 DRM_DEBUG("cik_irq_set: sw int dma\n");
6914 dma_cntl |= TRAP_ENABLE;
6917 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6918 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6919 dma_cntl1 |= TRAP_ENABLE;
6922 if (rdev->irq.crtc_vblank_int[0] ||
6923 atomic_read(&rdev->irq.pflip[0])) {
6924 DRM_DEBUG("cik_irq_set: vblank 0\n");
6925 crtc1 |= VBLANK_INTERRUPT_MASK;
6927 if (rdev->irq.crtc_vblank_int[1] ||
6928 atomic_read(&rdev->irq.pflip[1])) {
6929 DRM_DEBUG("cik_irq_set: vblank 1\n");
6930 crtc2 |= VBLANK_INTERRUPT_MASK;
6932 if (rdev->irq.crtc_vblank_int[2] ||
6933 atomic_read(&rdev->irq.pflip[2])) {
6934 DRM_DEBUG("cik_irq_set: vblank 2\n");
6935 crtc3 |= VBLANK_INTERRUPT_MASK;
6937 if (rdev->irq.crtc_vblank_int[3] ||
6938 atomic_read(&rdev->irq.pflip[3])) {
6939 DRM_DEBUG("cik_irq_set: vblank 3\n");
6940 crtc4 |= VBLANK_INTERRUPT_MASK;
6942 if (rdev->irq.crtc_vblank_int[4] ||
6943 atomic_read(&rdev->irq.pflip[4])) {
6944 DRM_DEBUG("cik_irq_set: vblank 4\n");
6945 crtc5 |= VBLANK_INTERRUPT_MASK;
6947 if (rdev->irq.crtc_vblank_int[5] ||
6948 atomic_read(&rdev->irq.pflip[5])) {
6949 DRM_DEBUG("cik_irq_set: vblank 5\n");
6950 crtc6 |= VBLANK_INTERRUPT_MASK;
6952 if (rdev->irq.hpd[0]) {
6953 DRM_DEBUG("cik_irq_set: hpd 1\n");
6954 hpd1 |= DC_HPDx_INT_EN;
6956 if (rdev->irq.hpd[1]) {
6957 DRM_DEBUG("cik_irq_set: hpd 2\n");
6958 hpd2 |= DC_HPDx_INT_EN;
6960 if (rdev->irq.hpd[2]) {
6961 DRM_DEBUG("cik_irq_set: hpd 3\n");
6962 hpd3 |= DC_HPDx_INT_EN;
6964 if (rdev->irq.hpd[3]) {
6965 DRM_DEBUG("cik_irq_set: hpd 4\n");
6966 hpd4 |= DC_HPDx_INT_EN;
6968 if (rdev->irq.hpd[4]) {
6969 DRM_DEBUG("cik_irq_set: hpd 5\n");
6970 hpd5 |= DC_HPDx_INT_EN;
6972 if (rdev->irq.hpd[5]) {
6973 DRM_DEBUG("cik_irq_set: hpd 6\n");
6974 hpd6 |= DC_HPDx_INT_EN;
6977 if (rdev->irq.dpm_thermal) {
6978 DRM_DEBUG("dpm thermal\n");
6979 if (rdev->flags & RADEON_IS_IGP)
6980 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6982 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6985 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6987 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6988 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6990 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6991 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6992 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6993 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6994 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6995 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6996 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6997 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6999 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7001 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7002 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7003 if (rdev->num_crtc >= 4) {
7004 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7005 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7007 if (rdev->num_crtc >= 6) {
7008 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7009 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7012 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7013 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7014 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7015 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7016 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7017 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7019 if (rdev->flags & RADEON_IS_IGP)
7020 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7022 WREG32_SMC(CG_THERMAL_INT, thermal_int);
7028 * cik_irq_ack - ack interrupt sources
7030 * @rdev: radeon_device pointer
7032 * Ack interrupt sources on the GPU (vblanks, hpd,
7033 * etc.) (CIK). Certain interrupts sources are sw
7034 * generated and do not require an explicit ack.
7036 static inline void cik_irq_ack(struct radeon_device *rdev)
7040 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7041 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7042 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7043 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7044 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7045 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7046 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7048 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7049 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7050 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7051 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7052 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7053 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7054 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7055 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7057 if (rdev->num_crtc >= 4) {
7058 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7059 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7060 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7061 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7062 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7063 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7064 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7065 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7068 if (rdev->num_crtc >= 6) {
7069 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7070 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7071 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7072 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7073 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7074 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7075 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7076 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7079 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7080 tmp = RREG32(DC_HPD1_INT_CONTROL);
7081 tmp |= DC_HPDx_INT_ACK;
7082 WREG32(DC_HPD1_INT_CONTROL, tmp);
7084 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7085 tmp = RREG32(DC_HPD2_INT_CONTROL);
7086 tmp |= DC_HPDx_INT_ACK;
7087 WREG32(DC_HPD2_INT_CONTROL, tmp);
7089 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7090 tmp = RREG32(DC_HPD3_INT_CONTROL);
7091 tmp |= DC_HPDx_INT_ACK;
7092 WREG32(DC_HPD3_INT_CONTROL, tmp);
7094 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7095 tmp = RREG32(DC_HPD4_INT_CONTROL);
7096 tmp |= DC_HPDx_INT_ACK;
7097 WREG32(DC_HPD4_INT_CONTROL, tmp);
7099 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7100 tmp = RREG32(DC_HPD5_INT_CONTROL);
7101 tmp |= DC_HPDx_INT_ACK;
7102 WREG32(DC_HPD5_INT_CONTROL, tmp);
7104 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7105 tmp = RREG32(DC_HPD5_INT_CONTROL);
7106 tmp |= DC_HPDx_INT_ACK;
7107 WREG32(DC_HPD6_INT_CONTROL, tmp);
7112 * cik_irq_disable - disable interrupts
7114 * @rdev: radeon_device pointer
7116 * Disable interrupts on the hw (CIK).
7118 static void cik_irq_disable(struct radeon_device *rdev)
7120 cik_disable_interrupts(rdev);
7121 /* Wait and acknowledge irq */
7124 cik_disable_interrupt_state(rdev);
7128 * cik_irq_disable - disable interrupts for suspend
7130 * @rdev: radeon_device pointer
7132 * Disable interrupts and stop the RLC (CIK).
7135 static void cik_irq_suspend(struct radeon_device *rdev)
7137 cik_irq_disable(rdev);
7142 * cik_irq_fini - tear down interrupt support
7144 * @rdev: radeon_device pointer
7146 * Disable interrupts on the hw and free the IH ring
7148 * Used for driver unload.
7150 static void cik_irq_fini(struct radeon_device *rdev)
7152 cik_irq_suspend(rdev);
7153 r600_ih_ring_fini(rdev);
7157 * cik_get_ih_wptr - get the IH ring buffer wptr
7159 * @rdev: radeon_device pointer
7161 * Get the IH ring buffer wptr from either the register
7162 * or the writeback memory buffer (CIK). Also check for
7163 * ring buffer overflow and deal with it.
7164 * Used by cik_irq_process().
7165 * Returns the value of the wptr.
7167 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7171 if (rdev->wb.enabled)
7172 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7174 wptr = RREG32(IH_RB_WPTR);
7176 if (wptr & RB_OVERFLOW) {
7177 /* When a ring buffer overflow happen start parsing interrupt
7178 * from the last not overwritten vector (wptr + 16). Hopefully
7179 * this should allow us to catchup.
7181 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7182 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7183 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7184 tmp = RREG32(IH_RB_CNTL);
7185 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7186 WREG32(IH_RB_CNTL, tmp);
7188 return (wptr & rdev->ih.ptr_mask);
7192 * Each IV ring entry is 128 bits:
7193 * [7:0] - interrupt source id
7195 * [59:32] - interrupt source data
7196 * [63:60] - reserved
7199 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7200 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7201 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7202 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7203 * PIPE_ID - ME0 0=3D
7204 * - ME1&2 compute dispatcher (4 pipes each)
7206 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7207 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7208 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7211 * [127:96] - reserved
7214 * cik_irq_process - interrupt handler
7216 * @rdev: radeon_device pointer
7218 * Interrupt hander (CIK). Walk the IH ring,
7219 * ack interrupts and schedule work to handle
7221 * Returns irq process return code.
7223 int cik_irq_process(struct radeon_device *rdev)
7225 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7226 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7229 u32 src_id, src_data, ring_id;
7230 u8 me_id, pipe_id, queue_id;
7232 bool queue_hotplug = false;
7233 bool queue_reset = false;
7234 u32 addr, status, mc_client;
7235 bool queue_thermal = false;
7237 if (!rdev->ih.enabled || rdev->shutdown)
7240 wptr = cik_get_ih_wptr(rdev);
7243 /* is somebody else already processing irqs? */
7244 if (atomic_xchg(&rdev->ih.lock, 1))
7247 rptr = rdev->ih.rptr;
7248 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7250 /* Order reading of wptr vs. reading of IH ring data */
7253 /* display interrupts */
7256 while (rptr != wptr) {
7257 /* wptr/rptr are in bytes! */
7258 ring_index = rptr / 4;
7259 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7260 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7261 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7264 case 1: /* D1 vblank/vline */
7266 case 0: /* D1 vblank */
7267 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7268 if (rdev->irq.crtc_vblank_int[0]) {
7269 drm_handle_vblank(rdev->ddev, 0);
7270 rdev->pm.vblank_sync = true;
7271 wake_up(&rdev->irq.vblank_queue);
7273 if (atomic_read(&rdev->irq.pflip[0]))
7274 radeon_crtc_handle_flip(rdev, 0);
7275 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7276 DRM_DEBUG("IH: D1 vblank\n");
7279 case 1: /* D1 vline */
7280 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7281 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7282 DRM_DEBUG("IH: D1 vline\n");
7286 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7290 case 2: /* D2 vblank/vline */
7292 case 0: /* D2 vblank */
7293 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7294 if (rdev->irq.crtc_vblank_int[1]) {
7295 drm_handle_vblank(rdev->ddev, 1);
7296 rdev->pm.vblank_sync = true;
7297 wake_up(&rdev->irq.vblank_queue);
7299 if (atomic_read(&rdev->irq.pflip[1]))
7300 radeon_crtc_handle_flip(rdev, 1);
7301 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7302 DRM_DEBUG("IH: D2 vblank\n");
7305 case 1: /* D2 vline */
7306 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7307 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7308 DRM_DEBUG("IH: D2 vline\n");
7312 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7316 case 3: /* D3 vblank/vline */
7318 case 0: /* D3 vblank */
7319 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7320 if (rdev->irq.crtc_vblank_int[2]) {
7321 drm_handle_vblank(rdev->ddev, 2);
7322 rdev->pm.vblank_sync = true;
7323 wake_up(&rdev->irq.vblank_queue);
7325 if (atomic_read(&rdev->irq.pflip[2]))
7326 radeon_crtc_handle_flip(rdev, 2);
7327 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7328 DRM_DEBUG("IH: D3 vblank\n");
7331 case 1: /* D3 vline */
7332 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7333 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7334 DRM_DEBUG("IH: D3 vline\n");
7338 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7342 case 4: /* D4 vblank/vline */
7344 case 0: /* D4 vblank */
7345 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7346 if (rdev->irq.crtc_vblank_int[3]) {
7347 drm_handle_vblank(rdev->ddev, 3);
7348 rdev->pm.vblank_sync = true;
7349 wake_up(&rdev->irq.vblank_queue);
7351 if (atomic_read(&rdev->irq.pflip[3]))
7352 radeon_crtc_handle_flip(rdev, 3);
7353 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7354 DRM_DEBUG("IH: D4 vblank\n");
7357 case 1: /* D4 vline */
7358 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7359 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7360 DRM_DEBUG("IH: D4 vline\n");
7364 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7368 case 5: /* D5 vblank/vline */
7370 case 0: /* D5 vblank */
7371 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7372 if (rdev->irq.crtc_vblank_int[4]) {
7373 drm_handle_vblank(rdev->ddev, 4);
7374 rdev->pm.vblank_sync = true;
7375 wake_up(&rdev->irq.vblank_queue);
7377 if (atomic_read(&rdev->irq.pflip[4]))
7378 radeon_crtc_handle_flip(rdev, 4);
7379 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7380 DRM_DEBUG("IH: D5 vblank\n");
7383 case 1: /* D5 vline */
7384 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7385 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7386 DRM_DEBUG("IH: D5 vline\n");
7390 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7394 case 6: /* D6 vblank/vline */
7396 case 0: /* D6 vblank */
7397 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7398 if (rdev->irq.crtc_vblank_int[5]) {
7399 drm_handle_vblank(rdev->ddev, 5);
7400 rdev->pm.vblank_sync = true;
7401 wake_up(&rdev->irq.vblank_queue);
7403 if (atomic_read(&rdev->irq.pflip[5]))
7404 radeon_crtc_handle_flip(rdev, 5);
7405 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7406 DRM_DEBUG("IH: D6 vblank\n");
7409 case 1: /* D6 vline */
7410 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7411 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7412 DRM_DEBUG("IH: D6 vline\n");
7416 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7420 case 42: /* HPD hotplug */
7423 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7424 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7425 queue_hotplug = true;
7426 DRM_DEBUG("IH: HPD1\n");
7430 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7431 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7432 queue_hotplug = true;
7433 DRM_DEBUG("IH: HPD2\n");
7437 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7438 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7439 queue_hotplug = true;
7440 DRM_DEBUG("IH: HPD3\n");
7444 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7445 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7446 queue_hotplug = true;
7447 DRM_DEBUG("IH: HPD4\n");
7451 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7452 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7453 queue_hotplug = true;
7454 DRM_DEBUG("IH: HPD5\n");
7458 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7459 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7460 queue_hotplug = true;
7461 DRM_DEBUG("IH: HPD6\n");
7465 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7470 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7471 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7475 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7476 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7477 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7478 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7479 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7481 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7483 cik_vm_decode_fault(rdev, status, addr, mc_client);
7484 /* reset addr and status */
7485 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7487 case 176: /* GFX RB CP_INT */
7488 case 177: /* GFX IB CP_INT */
7489 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7491 case 181: /* CP EOP event */
7492 DRM_DEBUG("IH: CP EOP\n");
7493 /* XXX check the bitfield order! */
7494 me_id = (ring_id & 0x60) >> 5;
7495 pipe_id = (ring_id & 0x18) >> 3;
7496 queue_id = (ring_id & 0x7) >> 0;
7499 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7503 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7504 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7505 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7506 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7510 case 184: /* CP Privileged reg access */
7511 DRM_ERROR("Illegal register access in command stream\n");
7512 /* XXX check the bitfield order! */
7513 me_id = (ring_id & 0x60) >> 5;
7514 pipe_id = (ring_id & 0x18) >> 3;
7515 queue_id = (ring_id & 0x7) >> 0;
7518 /* This results in a full GPU reset, but all we need to do is soft
7519 * reset the CP for gfx
7533 case 185: /* CP Privileged inst */
7534 DRM_ERROR("Illegal instruction in command stream\n");
7535 /* XXX check the bitfield order! */
7536 me_id = (ring_id & 0x60) >> 5;
7537 pipe_id = (ring_id & 0x18) >> 3;
7538 queue_id = (ring_id & 0x7) >> 0;
7541 /* This results in a full GPU reset, but all we need to do is soft
7542 * reset the CP for gfx
7556 case 224: /* SDMA trap event */
7557 /* XXX check the bitfield order! */
7558 me_id = (ring_id & 0x3) >> 0;
7559 queue_id = (ring_id & 0xc) >> 2;
7560 DRM_DEBUG("IH: SDMA trap\n");
7565 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7578 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7590 case 230: /* thermal low to high */
7591 DRM_DEBUG("IH: thermal low to high\n");
7592 rdev->pm.dpm.thermal.high_to_low = false;
7593 queue_thermal = true;
7595 case 231: /* thermal high to low */
7596 DRM_DEBUG("IH: thermal high to low\n");
7597 rdev->pm.dpm.thermal.high_to_low = true;
7598 queue_thermal = true;
7600 case 233: /* GUI IDLE */
7601 DRM_DEBUG("IH: GUI idle\n");
7603 case 241: /* SDMA Privileged inst */
7604 case 247: /* SDMA Privileged inst */
7605 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7606 /* XXX check the bitfield order! */
7607 me_id = (ring_id & 0x3) >> 0;
7608 queue_id = (ring_id & 0xc) >> 2;
7643 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7647 /* wptr/rptr are in bytes! */
7649 rptr &= rdev->ih.ptr_mask;
7652 schedule_work(&rdev->hotplug_work);
7654 schedule_work(&rdev->reset_work);
7656 schedule_work(&rdev->pm.dpm.thermal.work);
7657 rdev->ih.rptr = rptr;
7658 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7659 atomic_set(&rdev->ih.lock, 0);
7661 /* make sure wptr hasn't changed while processing */
7662 wptr = cik_get_ih_wptr(rdev);
7670 * startup/shutdown callbacks
7673 * cik_startup - program the asic to a functional state
7675 * @rdev: radeon_device pointer
7677 * Programs the asic to a functional state (CIK).
7678 * Called by cik_init() and cik_resume().
7679 * Returns 0 for success, error for failure.
7681 static int cik_startup(struct radeon_device *rdev)
7683 struct radeon_ring *ring;
7686 /* enable pcie gen2/3 link */
7687 cik_pcie_gen3_enable(rdev);
7689 cik_program_aspm(rdev);
7691 /* scratch needs to be initialized before MC */
7692 r = r600_vram_scratch_init(rdev);
7696 cik_mc_program(rdev);
7698 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7699 r = ci_mc_load_microcode(rdev);
7701 DRM_ERROR("Failed to load MC firmware!\n");
7706 r = cik_pcie_gart_enable(rdev);
7711 /* allocate rlc buffers */
7712 if (rdev->flags & RADEON_IS_IGP) {
7713 if (rdev->family == CHIP_KAVERI) {
7714 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7715 rdev->rlc.reg_list_size =
7716 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7718 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7719 rdev->rlc.reg_list_size =
7720 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7723 rdev->rlc.cs_data = ci_cs_data;
7724 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7725 r = sumo_rlc_init(rdev);
7727 DRM_ERROR("Failed to init rlc BOs!\n");
7731 /* allocate wb buffer */
7732 r = radeon_wb_init(rdev);
7736 /* allocate mec buffers */
7737 r = cik_mec_init(rdev);
7739 DRM_ERROR("Failed to init MEC BOs!\n");
7743 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7745 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7749 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7751 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7755 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7757 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7761 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7763 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7767 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7769 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7773 r = radeon_uvd_resume(rdev);
7775 r = uvd_v4_2_resume(rdev);
7777 r = radeon_fence_driver_start_ring(rdev,
7778 R600_RING_TYPE_UVD_INDEX);
7780 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7784 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7787 if (!rdev->irq.installed) {
7788 r = radeon_irq_kms_init(rdev);
7793 r = cik_irq_init(rdev);
7795 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7796 radeon_irq_kms_fini(rdev);
7801 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7802 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7803 PACKET3(PACKET3_NOP, 0x3FFF));
7807 /* set up the compute queues */
7808 /* type-2 packets are deprecated on MEC, use type-3 instead */
7809 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7810 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7811 PACKET3(PACKET3_NOP, 0x3FFF));
7814 ring->me = 1; /* first MEC */
7815 ring->pipe = 0; /* first pipe */
7816 ring->queue = 0; /* first queue */
7817 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7819 /* type-2 packets are deprecated on MEC, use type-3 instead */
7820 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7821 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7822 PACKET3(PACKET3_NOP, 0x3FFF));
7825 /* dGPU only have 1 MEC */
7826 ring->me = 1; /* first MEC */
7827 ring->pipe = 0; /* first pipe */
7828 ring->queue = 1; /* second queue */
7829 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7831 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7832 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7833 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7837 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7838 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7839 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7843 r = cik_cp_resume(rdev);
7847 r = cik_sdma_resume(rdev);
7851 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7852 if (ring->ring_size) {
7853 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7856 r = uvd_v1_0_init(rdev);
7858 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7861 r = radeon_ib_pool_init(rdev);
7863 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7867 r = radeon_vm_manager_init(rdev);
7869 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7873 r = dce6_audio_init(rdev);
7881 * cik_resume - resume the asic to a functional state
7883 * @rdev: radeon_device pointer
7885 * Programs the asic to a functional state (CIK).
7887 * Returns 0 for success, error for failure.
7889 int cik_resume(struct radeon_device *rdev)
7894 atom_asic_init(rdev->mode_info.atom_context);
7896 /* init golden registers */
7897 cik_init_golden_registers(rdev);
7899 radeon_pm_resume(rdev);
7901 rdev->accel_working = true;
7902 r = cik_startup(rdev);
7904 DRM_ERROR("cik startup failed on resume\n");
7905 rdev->accel_working = false;
7914 * cik_suspend - suspend the asic
7916 * @rdev: radeon_device pointer
7918 * Bring the chip into a state suitable for suspend (CIK).
7919 * Called at suspend.
7920 * Returns 0 for success.
7922 int cik_suspend(struct radeon_device *rdev)
7924 radeon_pm_suspend(rdev);
7925 dce6_audio_fini(rdev);
7926 radeon_vm_manager_fini(rdev);
7927 cik_cp_enable(rdev, false);
7928 cik_sdma_enable(rdev, false);
7929 uvd_v1_0_fini(rdev);
7930 radeon_uvd_suspend(rdev);
7933 cik_irq_suspend(rdev);
7934 radeon_wb_disable(rdev);
7935 cik_pcie_gart_disable(rdev);
7939 /* Plan is to move initialization in that function and use
7940 * helper function so that radeon_device_init pretty much
7941 * do nothing more than calling asic specific function. This
7942 * should also allow to remove a bunch of callback function
7946 * cik_init - asic specific driver and hw init
7948 * @rdev: radeon_device pointer
7950 * Setup asic specific driver variables and program the hw
7951 * to a functional state (CIK).
7952 * Called at driver startup.
7953 * Returns 0 for success, errors for failure.
7955 int cik_init(struct radeon_device *rdev)
7957 struct radeon_ring *ring;
7961 if (!radeon_get_bios(rdev)) {
7962 if (ASIC_IS_AVIVO(rdev))
7965 /* Must be an ATOMBIOS */
7966 if (!rdev->is_atom_bios) {
7967 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7970 r = radeon_atombios_init(rdev);
7974 /* Post card if necessary */
7975 if (!radeon_card_posted(rdev)) {
7977 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7980 DRM_INFO("GPU not posted. posting now...\n");
7981 atom_asic_init(rdev->mode_info.atom_context);
7983 /* init golden registers */
7984 cik_init_golden_registers(rdev);
7985 /* Initialize scratch registers */
7986 cik_scratch_init(rdev);
7987 /* Initialize surface registers */
7988 radeon_surface_init(rdev);
7989 /* Initialize clocks */
7990 radeon_get_clock_info(rdev->ddev);
7993 r = radeon_fence_driver_init(rdev);
7997 /* initialize memory controller */
7998 r = cik_mc_init(rdev);
8001 /* Memory manager */
8002 r = radeon_bo_init(rdev);
8006 if (rdev->flags & RADEON_IS_IGP) {
8007 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8008 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8009 r = cik_init_microcode(rdev);
8011 DRM_ERROR("Failed to load firmware!\n");
8016 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8017 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8019 r = cik_init_microcode(rdev);
8021 DRM_ERROR("Failed to load firmware!\n");
8027 /* Initialize power management */
8028 radeon_pm_init(rdev);
8030 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8031 ring->ring_obj = NULL;
8032 r600_ring_init(rdev, ring, 1024 * 1024);
8034 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8035 ring->ring_obj = NULL;
8036 r600_ring_init(rdev, ring, 1024 * 1024);
8037 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8041 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8042 ring->ring_obj = NULL;
8043 r600_ring_init(rdev, ring, 1024 * 1024);
8044 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8048 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8049 ring->ring_obj = NULL;
8050 r600_ring_init(rdev, ring, 256 * 1024);
8052 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8053 ring->ring_obj = NULL;
8054 r600_ring_init(rdev, ring, 256 * 1024);
8056 r = radeon_uvd_init(rdev);
8058 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8059 ring->ring_obj = NULL;
8060 r600_ring_init(rdev, ring, 4096);
8063 rdev->ih.ring_obj = NULL;
8064 r600_ih_ring_init(rdev, 64 * 1024);
8066 r = r600_pcie_gart_init(rdev);
8070 rdev->accel_working = true;
8071 r = cik_startup(rdev);
8073 dev_err(rdev->dev, "disabling GPU acceleration\n");
8075 cik_sdma_fini(rdev);
8077 sumo_rlc_fini(rdev);
8079 radeon_wb_fini(rdev);
8080 radeon_ib_pool_fini(rdev);
8081 radeon_vm_manager_fini(rdev);
8082 radeon_irq_kms_fini(rdev);
8083 cik_pcie_gart_fini(rdev);
8084 rdev->accel_working = false;
8087 /* Don't start up if the MC ucode is missing.
8088 * The default clocks and voltages before the MC ucode
8089 * is loaded are not suffient for advanced operations.
8091 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8092 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8100 * cik_fini - asic specific driver and hw fini
8102 * @rdev: radeon_device pointer
8104 * Tear down the asic specific driver variables and program the hw
8105 * to an idle state (CIK).
8106 * Called at driver unload.
8108 void cik_fini(struct radeon_device *rdev)
8110 radeon_pm_fini(rdev);
8112 cik_sdma_fini(rdev);
8116 sumo_rlc_fini(rdev);
8118 radeon_wb_fini(rdev);
8119 radeon_vm_manager_fini(rdev);
8120 radeon_ib_pool_fini(rdev);
8121 radeon_irq_kms_fini(rdev);
8122 uvd_v1_0_fini(rdev);
8123 radeon_uvd_fini(rdev);
8124 cik_pcie_gart_fini(rdev);
8125 r600_vram_scratch_fini(rdev);
8126 radeon_gem_fini(rdev);
8127 radeon_fence_driver_fini(rdev);
8128 radeon_bo_fini(rdev);
8129 radeon_atombios_fini(rdev);
8134 void dce8_program_fmt(struct drm_encoder *encoder)
8136 struct drm_device *dev = encoder->dev;
8137 struct radeon_device *rdev = dev->dev_private;
8138 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8139 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8140 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8143 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8146 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8147 bpc = radeon_get_monitor_bpc(connector);
8148 dither = radeon_connector->dither;
8151 /* LVDS/eDP FMT is set up by atom */
8152 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8155 /* not needed for analog */
8156 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8157 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8165 if (dither == RADEON_FMT_DITHER_ENABLE)
8166 /* XXX sort out optimal dither settings */
8167 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8168 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8170 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8173 if (dither == RADEON_FMT_DITHER_ENABLE)
8174 /* XXX sort out optimal dither settings */
8175 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8176 FMT_RGB_RANDOM_ENABLE |
8177 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8179 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8182 if (dither == RADEON_FMT_DITHER_ENABLE)
8183 /* XXX sort out optimal dither settings */
8184 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8185 FMT_RGB_RANDOM_ENABLE |
8186 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8188 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8195 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8198 /* display watermark setup */
8200 * dce8_line_buffer_adjust - Set up the line buffer
8202 * @rdev: radeon_device pointer
8203 * @radeon_crtc: the selected display controller
8204 * @mode: the current display mode on the selected display
8207 * Setup up the line buffer allocation for
8208 * the selected display controller (CIK).
8209 * Returns the line buffer size in pixels.
8211 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8212 struct radeon_crtc *radeon_crtc,
8213 struct drm_display_mode *mode)
8215 u32 tmp, buffer_alloc, i;
8216 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8219 * There are 6 line buffers, one for each display controllers.
8220 * There are 3 partitions per LB. Select the number of partitions
8221 * to enable based on the display width. For display widths larger
8222 * than 4096, you need use to use 2 display controllers and combine
8223 * them using the stereo blender.
8225 if (radeon_crtc->base.enabled && mode) {
8226 if (mode->crtc_hdisplay < 1920) {
8229 } else if (mode->crtc_hdisplay < 2560) {
8232 } else if (mode->crtc_hdisplay < 4096) {
8234 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8236 DRM_DEBUG_KMS("Mode too big for LB!\n");
8238 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8245 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8246 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8248 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8249 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8250 for (i = 0; i < rdev->usec_timeout; i++) {
8251 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8252 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8257 if (radeon_crtc->base.enabled && mode) {
8269 /* controller not enabled, so no lb used */
8274 * cik_get_number_of_dram_channels - get the number of dram channels
8276 * @rdev: radeon_device pointer
8278 * Look up the number of video ram channels (CIK).
8279 * Used for display watermark bandwidth calculations
8280 * Returns the number of dram channels
8282 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8284 u32 tmp = RREG32(MC_SHARED_CHMAP);
8286 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8309 struct dce8_wm_params {
8310 u32 dram_channels; /* number of dram channels */
8311 u32 yclk; /* bandwidth per dram data pin in kHz */
8312 u32 sclk; /* engine clock in kHz */
8313 u32 disp_clk; /* display clock in kHz */
8314 u32 src_width; /* viewport width */
8315 u32 active_time; /* active display time in ns */
8316 u32 blank_time; /* blank time in ns */
8317 bool interlaced; /* mode is interlaced */
8318 fixed20_12 vsc; /* vertical scale ratio */
8319 u32 num_heads; /* number of active crtcs */
8320 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8321 u32 lb_size; /* line buffer allocated to pipe */
8322 u32 vtaps; /* vertical scaler taps */
8326 * dce8_dram_bandwidth - get the dram bandwidth
8328 * @wm: watermark calculation data
8330 * Calculate the raw dram bandwidth (CIK).
8331 * Used for display watermark bandwidth calculations
8332 * Returns the dram bandwidth in MBytes/s
8334 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8336 /* Calculate raw DRAM Bandwidth */
8337 fixed20_12 dram_efficiency; /* 0.7 */
8338 fixed20_12 yclk, dram_channels, bandwidth;
8341 a.full = dfixed_const(1000);
8342 yclk.full = dfixed_const(wm->yclk);
8343 yclk.full = dfixed_div(yclk, a);
8344 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8345 a.full = dfixed_const(10);
8346 dram_efficiency.full = dfixed_const(7);
8347 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8348 bandwidth.full = dfixed_mul(dram_channels, yclk);
8349 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8351 return dfixed_trunc(bandwidth);
8355 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8357 * @wm: watermark calculation data
8359 * Calculate the dram bandwidth used for display (CIK).
8360 * Used for display watermark bandwidth calculations
8361 * Returns the dram bandwidth for display in MBytes/s
8363 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8365 /* Calculate DRAM Bandwidth and the part allocated to display. */
8366 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8367 fixed20_12 yclk, dram_channels, bandwidth;
8370 a.full = dfixed_const(1000);
8371 yclk.full = dfixed_const(wm->yclk);
8372 yclk.full = dfixed_div(yclk, a);
8373 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8374 a.full = dfixed_const(10);
8375 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8376 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8377 bandwidth.full = dfixed_mul(dram_channels, yclk);
8378 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8380 return dfixed_trunc(bandwidth);
8384 * dce8_data_return_bandwidth - get the data return bandwidth
8386 * @wm: watermark calculation data
8388 * Calculate the data return bandwidth used for display (CIK).
8389 * Used for display watermark bandwidth calculations
8390 * Returns the data return bandwidth in MBytes/s
8392 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8394 /* Calculate the display Data return Bandwidth */
8395 fixed20_12 return_efficiency; /* 0.8 */
8396 fixed20_12 sclk, bandwidth;
8399 a.full = dfixed_const(1000);
8400 sclk.full = dfixed_const(wm->sclk);
8401 sclk.full = dfixed_div(sclk, a);
8402 a.full = dfixed_const(10);
8403 return_efficiency.full = dfixed_const(8);
8404 return_efficiency.full = dfixed_div(return_efficiency, a);
8405 a.full = dfixed_const(32);
8406 bandwidth.full = dfixed_mul(a, sclk);
8407 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8409 return dfixed_trunc(bandwidth);
8413 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8415 * @wm: watermark calculation data
8417 * Calculate the dmif bandwidth used for display (CIK).
8418 * Used for display watermark bandwidth calculations
8419 * Returns the dmif bandwidth in MBytes/s
8421 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8423 /* Calculate the DMIF Request Bandwidth */
8424 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8425 fixed20_12 disp_clk, bandwidth;
8428 a.full = dfixed_const(1000);
8429 disp_clk.full = dfixed_const(wm->disp_clk);
8430 disp_clk.full = dfixed_div(disp_clk, a);
8431 a.full = dfixed_const(32);
8432 b.full = dfixed_mul(a, disp_clk);
8434 a.full = dfixed_const(10);
8435 disp_clk_request_efficiency.full = dfixed_const(8);
8436 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8438 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8440 return dfixed_trunc(bandwidth);
8444 * dce8_available_bandwidth - get the min available bandwidth
8446 * @wm: watermark calculation data
8448 * Calculate the min available bandwidth used for display (CIK).
8449 * Used for display watermark bandwidth calculations
8450 * Returns the min available bandwidth in MBytes/s
8452 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8454 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8455 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8456 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8457 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8459 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8463 * dce8_average_bandwidth - get the average available bandwidth
8465 * @wm: watermark calculation data
8467 * Calculate the average available bandwidth used for display (CIK).
8468 * Used for display watermark bandwidth calculations
8469 * Returns the average available bandwidth in MBytes/s
8471 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8473 /* Calculate the display mode Average Bandwidth
8474 * DisplayMode should contain the source and destination dimensions,
8478 fixed20_12 line_time;
8479 fixed20_12 src_width;
8480 fixed20_12 bandwidth;
8483 a.full = dfixed_const(1000);
8484 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8485 line_time.full = dfixed_div(line_time, a);
8486 bpp.full = dfixed_const(wm->bytes_per_pixel);
8487 src_width.full = dfixed_const(wm->src_width);
8488 bandwidth.full = dfixed_mul(src_width, bpp);
8489 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8490 bandwidth.full = dfixed_div(bandwidth, line_time);
8492 return dfixed_trunc(bandwidth);
8496 * dce8_latency_watermark - get the latency watermark
8498 * @wm: watermark calculation data
8500 * Calculate the latency watermark (CIK).
8501 * Used for display watermark bandwidth calculations
8502 * Returns the latency watermark in ns
8504 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8506 /* First calculate the latency in ns */
8507 u32 mc_latency = 2000; /* 2000 ns. */
8508 u32 available_bandwidth = dce8_available_bandwidth(wm);
8509 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8510 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8511 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8512 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8513 (wm->num_heads * cursor_line_pair_return_time);
8514 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8515 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8516 u32 tmp, dmif_size = 12288;
8519 if (wm->num_heads == 0)
8522 a.full = dfixed_const(2);
8523 b.full = dfixed_const(1);
8524 if ((wm->vsc.full > a.full) ||
8525 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8527 ((wm->vsc.full >= a.full) && wm->interlaced))
8528 max_src_lines_per_dst_line = 4;
8530 max_src_lines_per_dst_line = 2;
8532 a.full = dfixed_const(available_bandwidth);
8533 b.full = dfixed_const(wm->num_heads);
8534 a.full = dfixed_div(a, b);
8536 b.full = dfixed_const(mc_latency + 512);
8537 c.full = dfixed_const(wm->disp_clk);
8538 b.full = dfixed_div(b, c);
8540 c.full = dfixed_const(dmif_size);
8541 b.full = dfixed_div(c, b);
8543 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8545 b.full = dfixed_const(1000);
8546 c.full = dfixed_const(wm->disp_clk);
8547 b.full = dfixed_div(c, b);
8548 c.full = dfixed_const(wm->bytes_per_pixel);
8549 b.full = dfixed_mul(b, c);
8551 lb_fill_bw = min(tmp, dfixed_trunc(b));
8553 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8554 b.full = dfixed_const(1000);
8555 c.full = dfixed_const(lb_fill_bw);
8556 b.full = dfixed_div(c, b);
8557 a.full = dfixed_div(a, b);
8558 line_fill_time = dfixed_trunc(a);
8560 if (line_fill_time < wm->active_time)
8563 return latency + (line_fill_time - wm->active_time);
8568 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8569 * average and available dram bandwidth
8571 * @wm: watermark calculation data
8573 * Check if the display average bandwidth fits in the display
8574 * dram bandwidth (CIK).
8575 * Used for display watermark bandwidth calculations
8576 * Returns true if the display fits, false if not.
8578 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8580 if (dce8_average_bandwidth(wm) <=
8581 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8588 * dce8_average_bandwidth_vs_available_bandwidth - check
8589 * average and available bandwidth
8591 * @wm: watermark calculation data
8593 * Check if the display average bandwidth fits in the display
8594 * available bandwidth (CIK).
8595 * Used for display watermark bandwidth calculations
8596 * Returns true if the display fits, false if not.
8598 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8600 if (dce8_average_bandwidth(wm) <=
8601 (dce8_available_bandwidth(wm) / wm->num_heads))
8608 * dce8_check_latency_hiding - check latency hiding
8610 * @wm: watermark calculation data
8612 * Check latency hiding (CIK).
8613 * Used for display watermark bandwidth calculations
8614 * Returns true if the display fits, false if not.
8616 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8618 u32 lb_partitions = wm->lb_size / wm->src_width;
8619 u32 line_time = wm->active_time + wm->blank_time;
8620 u32 latency_tolerant_lines;
8624 a.full = dfixed_const(1);
8625 if (wm->vsc.full > a.full)
8626 latency_tolerant_lines = 1;
8628 if (lb_partitions <= (wm->vtaps + 1))
8629 latency_tolerant_lines = 1;
8631 latency_tolerant_lines = 2;
8634 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8636 if (dce8_latency_watermark(wm) <= latency_hiding)
8643 * dce8_program_watermarks - program display watermarks
8645 * @rdev: radeon_device pointer
8646 * @radeon_crtc: the selected display controller
8647 * @lb_size: line buffer size
8648 * @num_heads: number of display controllers in use
8650 * Calculate and program the display watermarks for the
8651 * selected display controller (CIK).
8653 static void dce8_program_watermarks(struct radeon_device *rdev,
8654 struct radeon_crtc *radeon_crtc,
8655 u32 lb_size, u32 num_heads)
8657 struct drm_display_mode *mode = &radeon_crtc->base.mode;
8658 struct dce8_wm_params wm_low, wm_high;
8661 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8664 if (radeon_crtc->base.enabled && num_heads && mode) {
8665 pixel_period = 1000000 / (u32)mode->clock;
8666 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8668 /* watermark for high clocks */
8669 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8670 rdev->pm.dpm_enabled) {
8672 radeon_dpm_get_mclk(rdev, false) * 10;
8674 radeon_dpm_get_sclk(rdev, false) * 10;
8676 wm_high.yclk = rdev->pm.current_mclk * 10;
8677 wm_high.sclk = rdev->pm.current_sclk * 10;
8680 wm_high.disp_clk = mode->clock;
8681 wm_high.src_width = mode->crtc_hdisplay;
8682 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8683 wm_high.blank_time = line_time - wm_high.active_time;
8684 wm_high.interlaced = false;
8685 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8686 wm_high.interlaced = true;
8687 wm_high.vsc = radeon_crtc->vsc;
8689 if (radeon_crtc->rmx_type != RMX_OFF)
8691 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8692 wm_high.lb_size = lb_size;
8693 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8694 wm_high.num_heads = num_heads;
8696 /* set for high clocks */
8697 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8699 /* possibly force display priority to high */
8700 /* should really do this at mode validation time... */
8701 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8702 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8703 !dce8_check_latency_hiding(&wm_high) ||
8704 (rdev->disp_priority == 2)) {
8705 DRM_DEBUG_KMS("force priority to high\n");
8708 /* watermark for low clocks */
8709 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8710 rdev->pm.dpm_enabled) {
8712 radeon_dpm_get_mclk(rdev, true) * 10;
8714 radeon_dpm_get_sclk(rdev, true) * 10;
8716 wm_low.yclk = rdev->pm.current_mclk * 10;
8717 wm_low.sclk = rdev->pm.current_sclk * 10;
8720 wm_low.disp_clk = mode->clock;
8721 wm_low.src_width = mode->crtc_hdisplay;
8722 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8723 wm_low.blank_time = line_time - wm_low.active_time;
8724 wm_low.interlaced = false;
8725 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8726 wm_low.interlaced = true;
8727 wm_low.vsc = radeon_crtc->vsc;
8729 if (radeon_crtc->rmx_type != RMX_OFF)
8731 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8732 wm_low.lb_size = lb_size;
8733 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8734 wm_low.num_heads = num_heads;
8736 /* set for low clocks */
8737 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8739 /* possibly force display priority to high */
8740 /* should really do this at mode validation time... */
8741 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8742 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8743 !dce8_check_latency_hiding(&wm_low) ||
8744 (rdev->disp_priority == 2)) {
8745 DRM_DEBUG_KMS("force priority to high\n");
8750 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8752 tmp &= ~LATENCY_WATERMARK_MASK(3);
8753 tmp |= LATENCY_WATERMARK_MASK(1);
8754 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8755 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8756 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8757 LATENCY_HIGH_WATERMARK(line_time)));
8759 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8760 tmp &= ~LATENCY_WATERMARK_MASK(3);
8761 tmp |= LATENCY_WATERMARK_MASK(2);
8762 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8763 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8764 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8765 LATENCY_HIGH_WATERMARK(line_time)));
8766 /* restore original selection */
8767 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8769 /* save values for DPM */
8770 radeon_crtc->line_time = line_time;
8771 radeon_crtc->wm_high = latency_watermark_a;
8772 radeon_crtc->wm_low = latency_watermark_b;
8776 * dce8_bandwidth_update - program display watermarks
8778 * @rdev: radeon_device pointer
8780 * Calculate and program the display watermarks and line
8781 * buffer allocation (CIK).
8783 void dce8_bandwidth_update(struct radeon_device *rdev)
8785 struct drm_display_mode *mode = NULL;
8786 u32 num_heads = 0, lb_size;
8789 radeon_update_display_priority(rdev);
8791 for (i = 0; i < rdev->num_crtc; i++) {
8792 if (rdev->mode_info.crtcs[i]->base.enabled)
8795 for (i = 0; i < rdev->num_crtc; i++) {
8796 mode = &rdev->mode_info.crtcs[i]->base.mode;
8797 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8798 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8803 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8805 * @rdev: radeon_device pointer
8807 * Fetches a GPU clock counter snapshot (SI).
8808 * Returns the 64 bit clock counter snapshot.
8810 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8814 mutex_lock(&rdev->gpu_clock_mutex);
8815 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8816 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8817 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8818 mutex_unlock(&rdev->gpu_clock_mutex);
8822 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8823 u32 cntl_reg, u32 status_reg)
8826 struct atom_clock_dividers dividers;
8829 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8830 clock, false, ÷rs);
8834 tmp = RREG32_SMC(cntl_reg);
8835 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8836 tmp |= dividers.post_divider;
8837 WREG32_SMC(cntl_reg, tmp);
8839 for (i = 0; i < 100; i++) {
8840 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8850 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8854 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8858 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8862 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8864 struct pci_dev *root = rdev->pdev->bus->self;
8865 int bridge_pos, gpu_pos;
8866 u32 speed_cntl, mask, current_data_rate;
8870 if (radeon_pcie_gen2 == 0)
8873 if (rdev->flags & RADEON_IS_IGP)
8876 if (!(rdev->flags & RADEON_IS_PCIE))
8879 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8883 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8886 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8887 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8888 LC_CURRENT_DATA_RATE_SHIFT;
8889 if (mask & DRM_PCIE_SPEED_80) {
8890 if (current_data_rate == 2) {
8891 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8894 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8895 } else if (mask & DRM_PCIE_SPEED_50) {
8896 if (current_data_rate == 1) {
8897 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8900 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8903 bridge_pos = pci_pcie_cap(root);
8907 gpu_pos = pci_pcie_cap(rdev->pdev);
8911 if (mask & DRM_PCIE_SPEED_80) {
8912 /* re-try equalization if gen3 is not already enabled */
8913 if (current_data_rate != 2) {
8914 u16 bridge_cfg, gpu_cfg;
8915 u16 bridge_cfg2, gpu_cfg2;
8916 u32 max_lw, current_lw, tmp;
8918 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8919 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8921 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8922 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8924 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8925 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8927 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8928 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8929 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8931 if (current_lw < max_lw) {
8932 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8933 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8934 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8935 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8936 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8937 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8941 for (i = 0; i < 10; i++) {
8943 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8944 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8947 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8948 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8950 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8951 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8953 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8954 tmp |= LC_SET_QUIESCE;
8955 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8957 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8959 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8964 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8965 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8966 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8967 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8969 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8970 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8971 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8972 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8975 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8976 tmp16 &= ~((1 << 4) | (7 << 9));
8977 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8978 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8980 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8981 tmp16 &= ~((1 << 4) | (7 << 9));
8982 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8983 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8985 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8986 tmp &= ~LC_SET_QUIESCE;
8987 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8992 /* set the link speed */
8993 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8994 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8995 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8997 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8999 if (mask & DRM_PCIE_SPEED_80)
9000 tmp16 |= 3; /* gen3 */
9001 else if (mask & DRM_PCIE_SPEED_50)
9002 tmp16 |= 2; /* gen2 */
9004 tmp16 |= 1; /* gen1 */
9005 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9007 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9008 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9009 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9011 for (i = 0; i < rdev->usec_timeout; i++) {
9012 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9013 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9019 static void cik_program_aspm(struct radeon_device *rdev)
9022 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9023 bool disable_clkreq = false;
9025 if (radeon_aspm == 0)
9028 /* XXX double check IGPs */
9029 if (rdev->flags & RADEON_IS_IGP)
9032 if (!(rdev->flags & RADEON_IS_PCIE))
9035 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9036 data &= ~LC_XMIT_N_FTS_MASK;
9037 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9039 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9041 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9042 data |= LC_GO_TO_RECOVERY;
9044 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9046 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9047 data |= P_IGNORE_EDB_ERR;
9049 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9051 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9052 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9053 data |= LC_PMI_TO_L1_DIS;
9055 data |= LC_L0S_INACTIVITY(7);
9058 data |= LC_L1_INACTIVITY(7);
9059 data &= ~LC_PMI_TO_L1_DIS;
9061 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9063 if (!disable_plloff_in_l1) {
9064 bool clk_req_support;
9066 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9067 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9068 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9070 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9072 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9073 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9074 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9076 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9078 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9079 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9080 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9082 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9084 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9085 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9086 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9088 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9090 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9091 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9092 data |= LC_DYN_LANES_PWR_STATE(3);
9094 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9096 if (!disable_clkreq) {
9097 struct pci_dev *root = rdev->pdev->bus->self;
9100 clk_req_support = false;
9101 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9102 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9103 clk_req_support = true;
9105 clk_req_support = false;
9108 if (clk_req_support) {
9109 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9110 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9112 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9114 orig = data = RREG32_SMC(THM_CLK_CNTL);
9115 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9116 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9118 WREG32_SMC(THM_CLK_CNTL, data);
9120 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9121 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9122 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9124 WREG32_SMC(MISC_CLK_CTRL, data);
9126 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9127 data &= ~BCLK_AS_XCLK;
9129 WREG32_SMC(CG_CLKPIN_CNTL, data);
9131 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9132 data &= ~FORCE_BIF_REFCLK_EN;
9134 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9136 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9137 data &= ~MPLL_CLKOUT_SEL_MASK;
9138 data |= MPLL_CLKOUT_SEL(4);
9140 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9145 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9148 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9149 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9151 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9154 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9155 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9156 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9157 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9158 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9159 data &= ~LC_L0S_INACTIVITY_MASK;
9161 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);