CPU Performance in Java.
-
Upload
dzmitry-hil -
Category
Software
-
view
95 -
download
0
Transcript of CPU Performance in Java.
CPU Performance in Java.
Перед началом оптимизации
● а это нужно вообще?● метрики● качественные бенчмарки● корректность измерений● узкое место
Откуда ноги растут?
● плохая архитектура● проблемы с потоками● неэффективные алгоритмы● неразумные структуры данных● неаккуратная работа с памятью● потери с I/O и работой с сетью● ...
Задача: Ускорить кодint N = 8192;
byte[][] arr = new byte[N][N];
static boolean check(byte[][] arr, int N)
{
int count = 0;
for(int i=0; i< N; i++)
for(int j=0; j< N; j++)
if(arr[j][i] < 0)
count--;
return count < 0;
}
3_744 ms
Нужно разобраться с устройством CPU
Если проще
CPU Cache
Вариант 1! Уже хорошо.int N = 8192;
byte[][] arr = new byte[N][N];
static boolean check(byte[][] arr, int N)
{
int count = 0;
for(int i=0; i< N; i++)
for(int j=0; j< N; j++)
if(arr[i][j] < 0)
count--;
return count < 0;
}
264 ms
Вариант 2! Отлично.int N = 8192;
byte[][] arr = new byte[N][N];
static boolean check(byte[][] arr, int N)
{
int count = 0;
for(int i=0; i< N; i++)
for(int j=0; j< N; j++)
count += arr[i][j] >> 7;
return count < 0;
}
214 ms
Итого:3744/214=17.5 (результат зависит от CPU)
Нужно все распоточить !
Работает медленно.
public class IterationThread implements Runnable { private int index; private long iterations;
public IterationThread(long iterations, int index) { this.index = index; this.iterations = iterations; }
@Override public void run() { for(long l = 0; l < iterations; ++l) { ++arr[index]; } }}
public class FalseSharing {
private static volatile long arr[] = new long[512];
private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors(); private static final long ITERATIONS = 2_000_000_000L;
public static void main(String[] args) throws Throwable { Thread[] threads = new Thread[THREAD_COUNT];
for(int i = 0; i < THREAD_COUNT; ++i) { threads[i] = new Thread(new IterationThread(ITERATIONS, i)); }
long start = System.currentTimeMillis(); for(Thread t: threads) { t.start(); } for(Thread t: threads) { t.join(); } System.out.println("time " + (System.currentTimeMillis() - start)); }
}
25_406 ms
Не все так просто. False sharing.
false sharing означает доступ к разным объектам в программе, разделяющим один и тот же блок кэш-памяти.
Работает хорошо.
public class IterationThread implements Runnable { private int index; private long iterations;
public IterationThread(long iterations, int index) { this.index = index; this.iterations = iterations; }
@Override public void run() { for(long l = 0; l < iterations; ++l) { ++arr[index]; } }}
public class TrueSharing {
private static volatile long arr[] = new long[512];
private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors(); private static final long ITERATIONS = 2_000_000_000L;
public static void main(String[] args) throws Throwable { Thread[] threads = new Thread[THREAD_COUNT];
for(int i = 0; i < THREAD_COUNT; ++i) { threads[i] = new Thread(new IterationThread(ITERATIONS, (i+1)*8)); }
long start = System.currentTimeMillis(); for(Thread t: threads) { t.start(); } for(Thread t: threads) { t.join(); } System.out.println("time " + (System.currentTimeMillis() - start)); }
}
4_949 ms
А что если нужно использовать свой собственный класс ???
False sharing with custom objectpublic static class IterationThread implements Runnable { private int index; private long iterations;
public IterationThread(long iterations, int index) { this.index = index; this.iterations = iterations; }
@Override public void run() { for(long l = 0; l < iterations; ++l) { ++arr[index].val; } }}
public class FalseSharing {private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors();private static final long ITERATIONS = 2_000_000_000L;private static MyObject arr[] = new MyObject[THREAD_COUNT];
static { for (int i = 0; i <arr.length; i++) { arr[i] = new MyObject(); }}
public static void main(String[] args) throws Throwable { Thread[] threads = new Thread[THREAD_COUNT];
for(int i = 0; i < THREAD_COUNT; ++i) { threads[i] = new Thread(new IterationThread(ITERATIONS, i)); } long start = System.currentTimeMillis(); for(Thread t: threads) { t.start(); } for(Thread t: threads) { t.join(); } System.out.println("time " + (System.currentTimeMillis() - start));}}
149_743 ms
public static class MyObject{ public volatile long val = 0L;}
Java 7 Padding.public static class IterationThread implements Runnable { private int index; private long iterations;
public IterationThread(long iterations, int index) { this.index = index; this.iterations = iterations; }
@Override public void run() { for(long l = 0; l < iterations; ++l) { arr[index].incrementAndGet(); } }}
public class FalseSharing {private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors();private static final long ITERATIONS = 2_000_000_000L;private static MyObject arr[] = new MyObject[THREAD_COUNT];
static { for (int i = 0; i <arr.length; i++) { arr[i] = new MyObject(); }}
public static void main(String[] args) throws Throwable { Thread[] threads = new Thread[THREAD_COUNT];
for(int i = 0; i < THREAD_COUNT; ++i) { threads[i] = new Thread(new IterationThread(ITERATIONS, i)); } long start = System.currentTimeMillis(); for(Thread t: threads) { t.start(); } for(Thread t: threads) { t.join(); } System.out.println("time " + (System.currentTimeMillis() - start));}}
14_539 ms
public static class MyObject extends AtomicLong { public volatile long p1, p2, p3, p4, p5, p6 = 7L;}
Java 8. @sun.misc.Contendedpublic static class IterationThread implements Runnable { private int index; private long iterations;
public IterationThread(long iterations, int index) { this.index = index; this.iterations = iterations; }
@Override public void run() { for(long l = 0; l < iterations; ++l) { arr[index].incrementAndGet(); } }}
public class FalseSharing {private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors();private static final long ITERATIONS = 2_000_000_000L;private static MyObject arr[] = new MyObject[THREAD_COUNT];
static { for (int i = 0; i <arr.length; i++) { arr[i] = new MyObject(); }}
public static void main(String[] args) throws Throwable { Thread[] threads = new Thread[THREAD_COUNT];
for(int i = 0; i < THREAD_COUNT; ++i) { threads[i] = new Thread(new IterationThread(ITERATIONS, i)); } long start = System.currentTimeMillis(); for(Thread t: threads) { t.start(); } for(Thread t: threads) { t.join(); } System.out.println("time " + (System.currentTimeMillis() - start));}}
14_983 ms
// unlock JVM option: -XX:-RestrictContended@Contendedpublic static class MyObject extends AtomicLong { public volatile long anyVal;}
Спасибо за внимание