From 89c98adf3ca7e672afee9dc9a7b5ffabcc4f17a1 Mon Sep 17 00:00:00 2001 From: Artur Mukhamadiev Date: Tue, 7 Oct 2025 22:40:20 +0300 Subject: [PATCH] init --- .gitignore | 4 ++ CMakeLists.txt | 10 ++++ conanfile.txt | 5 ++ src/bench.cc | 148 +++++++++++++++++++++++++++++++++++++++++++++++++ src/main.cc | 38 +++++++++++++ 5 files changed, 205 insertions(+) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 conanfile.txt create mode 100644 src/bench.cc create mode 100644 src/main.cc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1ec1b91 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +build +CMakeUserPresets.json +compile_commands.json +.cache/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..4e3e644 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.5) +project(vector-extension) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +add_executable(${PROJECT_NAME} src/bench.cc) + +find_package(benchmark REQUIRED) +target_link_libraries(${PROJECT_NAME} PRIVATE benchmark::benchmark_main) +target_compile_options(${PROJECT_NAME} PRIVATE -save-temps) +target_compile_options(${PROJECT_NAME} PUBLIC -mavx -mavx2 -march=native -O2) \ No newline at end of file diff --git a/conanfile.txt b/conanfile.txt new file mode 100644 index 0000000..f402c94 --- /dev/null +++ b/conanfile.txt @@ -0,0 +1,5 @@ +[requires] +benchmark/1.9.4 +[generators] +CMakeDeps +CMakeToolchain \ No newline at end of file diff --git a/src/bench.cc b/src/bench.cc new file mode 100644 index 0000000..65bdc63 --- /dev/null +++ b/src/bench.cc @@ -0,0 +1,148 @@ +// 1.2.1: Example of SSE2 intrinsics +// for int32_t +#include +#include +// for SSE2 intrinsics +#include +// for AVX2 intrinsics +#include + +void vector(void) +{ + int32_t array_a[4] = { 0, 2, 1, 2 }; // 128 bit + int32_t array_b[4] = { 8, 5, 0, 6 }; + int32_t array_c[4]; + __m128i a, b, c; + a = _mm_loadu_si128((__m128i*)array_a); // loading array_a into register a + b = _mm_loadu_si128((__m128i*)array_b); + c = _mm_add_epi32(a, b); // must be { 8,7,1,8 } + _mm_storeu_si128((__m128i*)array_c, c); +} + +static void BM_calculateVector(benchmark::State& state) +{ + for (auto _ : state) + vector(); +} + +BENCHMARK(BM_calculateVector); + +void scalar() +{ + int32_t array_a[4] = { 0, 2, 1, 2 }; // 128 bit + int32_t array_b[4] = { 8, 5, 0, 6 }; + int32_t array_c[4]; + for (int i = 0; i < 4; i++) + array_c[i] = array_a[i] + array_b[i]; +} + +static void BM_calculateScalar(benchmark::State& state) +{ + for (auto _ : state) + scalar(); +} + +BENCHMARK(BM_calculateScalar); + +template +std::array generateLongArray() +{ + std::array arr; + for (auto& a : arr) { + a = rand() % 100; + } + return arr; +} + +template +std::array with_vector(std::array& arr, std::array& arr2) +{ + std::array result; + auto resPos = result.begin(); + for (auto it = arr.begin(), it2 = arr2.begin(); + it < arr.end() && it2 < arr.end(); + it2 += 4, it += 4) { + __m128i a, b, c; + a = _mm_loadu_si128((__m128i*)&(*it)); // loading array_a into register a + b = _mm_loadu_si128((__m128i*)&(*it2)); + c = _mm_add_epi32(a, b); + _mm_storeu_si128((__m128i*)&(*resPos), c); + resPos += 4; + } + return result; +} + +static void BM_arrayVector(benchmark::State& state) +{ + auto arr1 = generateLongArray(); + auto arr2 = generateLongArray(); + + for (auto _ : state) + auto arr3 = with_vector(arr1, arr2); +} + +BENCHMARK(BM_arrayVector); + +template +std::array with_scalar(std::array& arr, std::array& arr2) +{ + std::array result; + auto resPos = result.begin(); + for (auto it = arr.begin(), it2 = arr2.begin(); + it < arr.end() && it2 < arr.end(); + ++it2, ++it) { + *resPos = *it + *it2; + } + return result; +} + +static void BM_arrayScalar(benchmark::State& state) +{ + auto arr1 = generateLongArray(); + auto arr2 = generateLongArray(); + + for (auto _ : state) + auto arr3 = with_scalar(arr1, arr2); +} + +BENCHMARK(BM_arrayScalar); + +template +std::array with_vectorAVX2(std::array& arr, std::array& arr2) +{ + std::array result; + auto resPos = result.begin(); + __m256i a, b, c; + for (auto it = arr.begin(), it2 = arr2.begin(); + it < arr.end() && it2 < arr.end(); + it2 += 4, it += 4) { + a = _mm256_stream_load_si256((__m256i*)&(*it)); + b = _mm256_stream_load_si256((__m256i*)&(*it2)); + c = _mm256_add_epi64(a, b); + _mm256_store_si256((__m256i*)&(*resPos), c); + resPos += 4; + } + return result; +} + +static void BM_arrayAVX2Vector(benchmark::State& state) +{ + auto arr1 = generateLongArray(); + auto arr2 = generateLongArray(); + for (auto _ : state) + auto arr3 = with_vectorAVX2(arr1, arr2); +} + +static void BM_arrayScalar64(benchmark::State& state) +{ + auto arr1 = generateLongArray(); + auto arr2 = generateLongArray(); + + for (auto _ : state) + auto arr3 = with_scalar(arr1, arr2); +} + +BENCHMARK(BM_arrayAVX2Vector); +BENCHMARK(BM_arrayScalar64); + +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/src/main.cc b/src/main.cc new file mode 100644 index 0000000..887c8c8 --- /dev/null +++ b/src/main.cc @@ -0,0 +1,38 @@ +#include +#include + +template +void generateLongArray(std::array& arr) +{ + for (int& a : arr) { + a = rand() % 100; + } +} + +template +std::array with_vector(std::array& arr, std::array& arr2) +{ + std::array result; + auto resPos = result.begin(); + for (auto it = arr.begin(), it2 = arr2.begin(); + it < arr.end(), it2 < arr.end(); + ++it2, ++it) { + int32_t array_a[4] = { *it++, *it++, *it++, *it }; // 128 bit + int32_t array_b[4] = { *it2++, *it2++, *it2++, *it2 }; + __m128i a, b, c; + a = _mm_loadu_si128((__m128i*)array_a); // loading array_a into register a + b = _mm_loadu_si128((__m128i*)array_b); + c = _mm_add_epi32(a, b); + _mm_storeu_si128((__m128i*)&(*resPos), c); + resPos += 4; + } + return result; +} + +int main() +{ + std::array a; + generateLongArray(a); + + return 0; +} \ No newline at end of file