]> Pileus Git - ~andy/linux/blobdiff - tools/perf/builtin-stat.c
perf stat: Print out miss/hit ratio for L1 data-cache events
[~andy/linux] / tools / perf / builtin-stat.c
index 03f0e45f1479c2fab31de2624d69d95458886157..03bac6aa014b679b9d46ba1221a80fc7e7eff34c 100644 (file)
@@ -46,6 +46,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/debug.h"
+#include "util/color.h"
 #include "util/header.h"
 #include "util/cpumap.h"
 #include "util/thread.h"
@@ -65,11 +66,10 @@ static struct perf_event_attr default_attrs[] = {
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS            },
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES             },
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES         },
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS           },
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS    },
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES          },
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES       },
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES           },
 
 };
 
@@ -156,7 +156,10 @@ static double stddev_stats(struct stats *stats)
 
 struct stats                   runtime_nsecs_stats[MAX_NR_CPUS];
 struct stats                   runtime_cycles_stats[MAX_NR_CPUS];
+struct stats                   runtime_stalled_cycles_stats[MAX_NR_CPUS];
 struct stats                   runtime_branches_stats[MAX_NR_CPUS];
+struct stats                   runtime_cacherefs_stats[MAX_NR_CPUS];
+struct stats                   runtime_l1_dcache_stats[MAX_NR_CPUS];
 struct stats                   walltime_nsecs_stats;
 
 static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -192,6 +195,27 @@ static inline int nsec_counter(struct perf_evsel *evsel)
        return 0;
 }
 
+/*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
+{
+       if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+               update_stats(&runtime_nsecs_stats[0], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+               update_stats(&runtime_cycles_stats[0], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES))
+               update_stats(&runtime_stalled_cycles_stats[0], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+               update_stats(&runtime_branches_stats[0], count[0]);
+       else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+               update_stats(&runtime_cacherefs_stats[0], count[0]);
+       else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+               update_stats(&runtime_l1_dcache_stats[0], count[0]);
+}
+
 /*
  * Read out the results of a single counter:
  * aggregate counts across CPUs in system-wide mode
@@ -217,12 +241,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
        /*
         * Save the full runtime - to allow normalization during printout:
         */
-       if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
-               update_stats(&runtime_nsecs_stats[0], count[0]);
-       if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-               update_stats(&runtime_cycles_stats[0], count[0]);
-       if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-               update_stats(&runtime_branches_stats[0], count[0]);
+       update_shadow_stats(counter, count);
 
        return 0;
 }
@@ -242,12 +261,7 @@ static int read_counter(struct perf_evsel *counter)
 
                count = counter->counts->cpu[cpu].values;
 
-               if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
-                       update_stats(&runtime_nsecs_stats[cpu], count[0]);
-               if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-                       update_stats(&runtime_cycles_stats[cpu], count[0]);
-               if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-                       update_stats(&runtime_branches_stats[cpu], count[0]);
+               update_shadow_stats(counter, count);
        }
 
        return 0;
@@ -372,6 +386,16 @@ static int run_perf_stat(int argc __used, const char **argv)
        return WEXITSTATUS(status);
 }
 
+static void print_noise_pct(double total, double avg)
+{
+       double pct = 0.0;
+
+       if (avg)
+               pct = 100.0*total/avg;
+
+       fprintf(stderr, "  ( +-%6.2f%% )", pct);
+}
+
 static void print_noise(struct perf_evsel *evsel, double avg)
 {
        struct perf_stat *ps;
@@ -380,8 +404,7 @@ static void print_noise(struct perf_evsel *evsel, double avg)
                return;
 
        ps = evsel->priv;
-       fprintf(stderr, "   ( +- %7.3f%% )",
-                       100 * stddev_stats(&ps->res_stats[0]) / avg);
+       print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 }
 
 static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
@@ -404,8 +427,76 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
                return;
 
        if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
-               fprintf(stderr, " # %10.3f CPUs ",
-                               avg / avg_stats(&walltime_nsecs_stats));
+               fprintf(stderr, " # %8.3f CPUs utilized          ", avg / avg_stats(&walltime_nsecs_stats));
+}
+
+static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+
+       total = avg_stats(&runtime_cycles_stats[cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = PERF_COLOR_NORMAL;
+       if (ratio > 75.0)
+               color = PERF_COLOR_RED;
+       else if (ratio > 50.0)
+               color = PERF_COLOR_MAGENTA;
+       else if (ratio > 25.0)
+               color = PERF_COLOR_YELLOW;
+
+       fprintf(stderr, " #   ");
+       color_fprintf(stderr, color, "%5.2f%%", ratio);
+       fprintf(stderr, " of all cycles are idle ");
+}
+
+static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+
+       total = avg_stats(&runtime_branches_stats[cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = PERF_COLOR_NORMAL;
+       if (ratio > 20.0)
+               color = PERF_COLOR_RED;
+       else if (ratio > 10.0)
+               color = PERF_COLOR_MAGENTA;
+       else if (ratio > 5.0)
+               color = PERF_COLOR_YELLOW;
+
+       fprintf(stderr, " #   ");
+       color_fprintf(stderr, color, "%5.2f%%", ratio);
+       fprintf(stderr, " of all branches        ");
+}
+
+static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+       double total, ratio = 0.0;
+       const char *color;
+
+       total = avg_stats(&runtime_l1_dcache_stats[cpu]);
+
+       if (total)
+               ratio = avg / total * 100.0;
+
+       color = PERF_COLOR_NORMAL;
+       if (ratio > 20.0)
+               color = PERF_COLOR_RED;
+       else if (ratio > 10.0)
+               color = PERF_COLOR_MAGENTA;
+       else if (ratio > 5.0)
+               color = PERF_COLOR_YELLOW;
+
+       fprintf(stderr, " #   ");
+       color_fprintf(stderr, color, "%5.2f%%", ratio);
+       fprintf(stderr, " of all L1-dcache hits  ");
 }
 
 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
@@ -442,23 +533,52 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
                if (total)
                        ratio = avg / total;
 
-               fprintf(stderr, " # %10.3f IPC  ", ratio);
+               fprintf(stderr, " #    %4.2f  insns per cycle", ratio);
+
+               total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
+
+               if (total && avg) {
+                       ratio = total / avg;
+                       fprintf(stderr, "\n                                            #    %4.2f  stalled cycles per insn", ratio);
+               }
+
        } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
                        runtime_branches_stats[cpu].n != 0) {
-               total = avg_stats(&runtime_branches_stats[cpu]);
+               print_branch_misses(cpu, evsel, avg);
+       } else if (
+               evsel->attr.type == PERF_TYPE_HW_CACHE &&
+               evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
+                                       ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+                                       ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+                       runtime_branches_stats[cpu].n != 0) {
+               print_l1_dcache_misses(cpu, evsel, avg);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
+                       runtime_cacherefs_stats[cpu].n != 0) {
+               total = avg_stats(&runtime_cacherefs_stats[cpu]);
 
                if (total)
                        ratio = avg * 100 / total;
 
-               fprintf(stderr, " # %10.3f %%    ", ratio);
+               fprintf(stderr, " # %8.3f %% of all cache refs    ", ratio);
+
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
+               print_stalled_cycles(cpu, evsel, avg);
+       } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+               total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+               if (total)
+                       ratio = 1.0 * avg / total;
 
+               fprintf(stderr, " # %8.3f GHz                    ", ratio);
        } else if (runtime_nsecs_stats[cpu].n != 0) {
                total = avg_stats(&runtime_nsecs_stats[cpu]);
 
                if (total)
                        ratio = 1000.0 * avg / total;
 
-               fprintf(stderr, " # %10.3f M/sec", ratio);
+               fprintf(stderr, " # %8.3f M/sec                  ", ratio);
+       } else {
+               fprintf(stderr, "                                   ");
        }
 }
 
@@ -595,9 +715,8 @@ static void print_stat(int argc, const char **argv)
                fprintf(stderr, " %18.9f  seconds time elapsed",
                                avg_stats(&walltime_nsecs_stats)/1e9);
                if (run_count > 1) {
-                       fprintf(stderr, "   ( +- %7.3f%% )",
-                               100*stddev_stats(&walltime_nsecs_stats) /
-                               avg_stats(&walltime_nsecs_stats));
+                       print_noise_pct(stddev_stats(&walltime_nsecs_stats),
+                                       avg_stats(&walltime_nsecs_stats));
                }
                fprintf(stderr, "\n\n");
        }