|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
在测试batch_process_avx2函数中发现,数据发生混乱,测试案例落了两个黑子,所有的白子数据正常都应该为0才对,但是在函数中发生了混乱。
我想知道为什么会发生数据混乱的问题,大致方向,我好进行排查
在打印案例中第一轮在未处理签,白子所有数据均为0,正常。处理后第4,第五有异常,变成了一,期望值应该是0。在白棋内部打印并未触发,索引也已经打印
- this line 147 Running test_get_move function ...
- [DEBUG] GET_SORT_MOVES line 483 data number is :16 success !
- +++++ [debug] batch_process_avx2 line 252 simd_white[1,2,5]++++++:
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- |-|-|-|-| [debug] batch_process_avx2 line 250 player= 1 and BLACK_PLAYER= 1 <k:j> <1:0> | <1:1> | <1:2> | <1:3> | <1:4> | <1:5> | <1:6> | <1:7> |
- --- [debug] batch_process_avx2 line 252 simd_white[1,2,5]++++++:
- :572 :0 :0 :0 :1 :0
- :572 :0 :0 :0 :1 :0
- :572 :0 :0 :0 :1 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :1
- [DEBUG] batch 0 Rating: -632 -632 -632 368 368 368 368 -9632
- +++++ [debug] batch_process_avx2 line 252 simd_white[1,2,5]++++++:
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- |-|-|-|-| [debug] batch_process_avx2 line 250 player= 1 and BLACK_PLAYER= 1 <k:j> <2:0> | <5:1> | <5:2> | <1:3> | <1:4> | <5:5> | <5:6> | <2:7> |
- --- [debug] batch_process_avx2 line 252 simd_white[1,2,5]++++++:
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- :572 :0 :0 :0 :0 :0
- [DEBUG] batch 8 Rating: 377 10367 10367 368 368 10367 10367 377
- this line 147-174 Running function test_get_move... success Passed!
复制代码
GET_SORT_MOVES函数
- CORE_API void GET_SORT_MOVES(AlignedSIMDContext* ctx, int player, int top_n) noexcept {
- if (!ctx || ctx->undo_top >= 255) {
- ctx->num_sorted_moves = 0;
- return;
- }
- // 生成候选着法
- std::vector<std::pair<int, int>> history;
- history.reserve(ctx->undo_top);
- for (int i = 0; i < ctx->undo_top; ++i) {
- history.emplace_back(ctx->undo_stack[i].x, ctx->undo_stack[i].y);
- }
- auto candidates = generate_candidates(history);
- ctx->num_sorted_moves = 0;
- if (candidates.empty()) return;
- std::vector<SIMDContext> thread_ctxs(omp_get_max_threads());
- std::vector<int> scores(candidates.size());
- std::vector<Move> temp_moves;
- std::cout << "[DEBUG] GET_SORT_MOVES line 483 data number is :" << candidates.size() << " success ! \n ";
- #pragma omp parallel
- {
- const int tid = omp_get_thread_num();
- #pragma omp for schedule(static, 64)
- for (int i = 0; i < candidates.size(); i += SIMD_BATCH_SIZE) {
- copy_core_data(&thread_ctxs[tid], ctx);
- const int batch = std::min(SIMD_BATCH_SIZE, int(candidates.size() - i));
- batch_process_avx2(
- &thread_ctxs[tid],
- {candidates.begin() + i, candidates.begin() + i + batch},
- player,
- scores.data() + i
- );
- std::cout << "\n[DEBUG] batch " << i << " Rating: ";
- for (int j = 0; j < batch; ++j) {
- std::cout << scores[i + j] << " ";
- }
- std::cout << std::endl << std::flush;
- }
- }
- // 主线程处理排序
- temp_moves.reserve(candidates.size());
- for (size_t i = 0; i < candidates.size(); ++i) {
- temp_moves.push_back({ scores[i], { candidates[i].first, candidates[i].second } });
- }
- const int partial_sort_size = std::min(static_cast<int>(temp_moves.size()), top_n * 2);
- std::partial_sort(
- temp_moves.begin(),
- temp_moves.begin() + partial_sort_size,
- temp_moves.end(),
- [](const Move& a, const Move& b) { return a.score > b.score; }
- );
- ctx->num_sorted_moves = std::min(top_n, static_cast<int>(temp_moves.size()));
- for (int i = 0; i < ctx->num_sorted_moves; ++i) {
- if (i >= MAX_SORTED_MOVES) break;
- ctx->sorted_moves[i] = temp_moves[i];
- }
- }
- }
复制代码
copy_core_data函数
- inline void copy_core_data(
- SIMDContext* dst,
- const AlignedSIMDContext* src
- ) {
- static_assert(sizeof(dst->vec_states) == sizeof(src->vec_states),
- "VectorState 大小不匹配");
- memcpy(dst->vec_states, src->vec_states, sizeof(VectorState)*MAX_VECTORS);
- for (int k = 0; k < 6; ++k) {
- for (int j = 0; j < SIMD_WIDTH; ++j) {
- dst->simd_black[k][j] = src->black_counts[k];
- dst->simd_white[k][j] = src->white_counts[k];
- }
- }
- }
复制代码
batch_process_avx2函数
- void batch_process_avx2(
- SIMDContext* ctx,
- const std::vector<std::pair<int, int>>& moves,
- int player,
- int* scores)
- {
- alignas(32) int indices[SIMD_WIDTH];
- for (size_t i = 0; i < moves.size(); i += SIMD_WIDTH) {
- printf("\n+++++ [debug] batch_process_avx2 line 252 simd_white[1,2,5]++++++: ");
- for (int j = 0; j < 8; ++j) {
- printf("\n");
- for (int k = 0; k < 6; ++k) {
- printf(" :%d", ctx->simd_white[k][j]);
- }
- }
- const int batch = std::min(SIMD_WIDTH, static_cast<int>(moves.size() - i));
- // 填充索引数组
- for (int j = 0; j < batch; ++j) {
- const auto& [x, y] = moves[i + j];
- indices[j] = y * BOARD_SIZE + x;
- }
- for (int j = batch; j < SIMD_WIDTH; ++j) {
- indices[j] = 0;
- }
- __m256i v_idx = _mm256_load_si256(reinterpret_cast<__m256i*>(indices));
- // 根据玩家更新计数器
- __m256i v_counts;
- alignas(32) int new_counts[SIMD_WIDTH];
- alignas(32) int stored_indices[SIMD_WIDTH];
- _mm256_store_si256(reinterpret_cast<__m256i*>(stored_indices), v_idx);
- if (player == BLACK_PLAYER) {
- v_counts = _mm256_i32gather_epi32(ctx->black_counts, v_idx, sizeof(int32_t));
- v_counts = _mm256_add_epi32(v_counts, _mm256_set1_epi32(1));
- _mm256_store_si256(reinterpret_cast<__m256i*>(new_counts), v_counts);
- for (int j = 0; j < batch; ++j) {
- ctx->black_counts[stored_indices[j]] = new_counts[j];
- }
- } else {
- v_counts = _mm256_i32gather_epi32(ctx->white_counts, v_idx, sizeof(int32_t));
- v_counts = _mm256_add_epi32(v_counts, _mm256_set1_epi32(1));
- _mm256_store_si256(reinterpret_cast<__m256i*>(new_counts), v_counts);
- for (int j = 0; j < batch; ++j) {
- ctx->white_counts[stored_indices[j]] = new_counts[j];
- }
- }
- // 更新SIMD计数器
- printf("\n|-|-|-|-| [debug] batch_process_avx2 line 250 player= %d and BLACK_PLAYER= %d <k:j> ",player,BLACK_PLAYER);
- for (int m = 0; m < batch; ++m) {
- const int k = std::clamp(new_counts[m], 0, 5);
- printf("<%d:%d> | ",k ,m);
- if (player == BLACK_PLAYER) {
- ctx->simd_black[k][m]++;
- } else {
- printf("<> | <> | <> | <> | <> | <> | <> | <> | <> | <> | <> | <> | ");
- ctx->simd_white[k][m]++;
- }
- }
- __m256i v_scores = calculate_scores_avx2(ctx, player);
- printf("\n--- [debug] batch_process_avx2 line 252 simd_white[1,2,5]++++++: ");
- for (int j = 0; j < 8; ++j) {
- printf("\n");
- for (int k = 0; k < 6; ++k) {
- printf(" :%d", ctx->simd_white[k][j]);
- }
- }
- alignas(32) int score_values[SIMD_WIDTH];
- _mm256_store_si256(reinterpret_cast<__m256i*>(score_values), v_scores);
- for (int j = 0; j < batch; ++j) {
- scores[i + j] = score_values[j];
- }
- }
- }
复制代码
两个数据体结构
- struct alignas(64) SIMDContext {
- VectorState vec_states[MAX_VECTORS];
- alignas(32) int32_t black_counts[6];
- alignas(32) int32_t white_counts[6];
- // 核心状态(32字节对齐)
- alignas(32) int32_t simd_black[6][8];
- alignas(32) int32_t simd_white[6][8];
- };
- class CORE_API alignas(SIMD_ALIGNMENT) AlignedSIMDContext {
- public:
- Vector vectors[MAX_VECTORS];
- VectorState vec_states[MAX_VECTORS];
- Move sorted_moves[MAX_CELLS];
- int32_t num_sorted_moves = 0;
- alignas(32) int32_t black_counts[6];
- alignas(32) int32_t white_counts[6];
复制代码 |
|