数据混乱问题,C\C++交流,编程语言专区,鱼C论坛

Stubborn 发表于 2025-3-10 20:58:17

数据混乱问题

在测试batch_process_avx2函数中发现，数据发生混乱，测试案例落了两个黑子，所有的白子数据正常都应该为0才对，但是在函数中发生了混乱。
我想知道为什么会发生数据混乱的问题，大致方向，我好进行排查
在打印案例中第一轮在未处理签，白子所有数据均为0，正常。处理后第4，第五有异常，变成了一，期望值应该是0。在白棋内部打印并未触发，索引也已经打印

this line 147 Running test_get_move function ...
GET_SORT_MOVES line 483 data number is :16 success !

+++++ batch_process_avx2 line 252 simd_white++++++:
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
|-|-|-|-| batch_process_avx2 line 250 player= 1 andBLACK_PLAYER= 1 <k:j> <1:0> | <1:1> | <1:2> | <1:3> | <1:4> | <1:5> | <1:6> | <1:7> |
--- batch_process_avx2 line 252 simd_white++++++:
:572 :0 :0 :0 :1 :0
:572 :0 :0 :0 :1 :0
:572 :0 :0 :0 :1 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :1
batch 0 Rating: -632 -632 -632 368 368 368 368 -9632

+++++ batch_process_avx2 line 252 simd_white++++++:
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
|-|-|-|-| batch_process_avx2 line 250 player= 1 andBLACK_PLAYER= 1 <k:j> <2:0> | <5:1> | <5:2> | <1:3> | <1:4> | <5:5> | <5:6> | <2:7> |
--- batch_process_avx2 line 252 simd_white++++++:
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
:572 :0 :0 :0 :0 :0
batch 8 Rating: 377 10367 10367 368 368 10367 10367 377
this line 147-174 Running function test_get_move... success Passed!

GET_SORT_MOVES函数
CORE_API void GET_SORT_MOVES(AlignedSIMDContext* ctx, int player, int top_n) noexcept {
   if (!ctx || ctx->undo_top >= 255) {
         ctx->num_sorted_moves = 0;
         return;
   }

   // 生成候选着法
   std::vector<std::pair<int, int>> history;
   history.reserve(ctx->undo_top);
   for (int i = 0; i < ctx->undo_top; ++i) {
         history.emplace_back(ctx->undo_stack.x, ctx->undo_stack.y);
   }
   auto candidates = generate_candidates(history);
   ctx->num_sorted_moves = 0;
   if (candidates.empty()) return;

   std::vector<SIMDContext> thread_ctxs(omp_get_max_threads());
   std::vector<int> scores(candidates.size());
   std::vector<Move> temp_moves;
   std::cout << " GET_SORT_MOVES line 483 data number is :" << candidates.size() << " success ! \n ";

#pragma omp parallel
   {
         const int tid = omp_get_thread_num();

#pragma omp for schedule(static, 64)
         for (int i = 0; i < candidates.size(); i += SIMD_BATCH_SIZE) {
            copy_core_data(&thread_ctxs, ctx);
            const int batch = std::min(SIMD_BATCH_SIZE, int(candidates.size() - i));
            batch_process_avx2(
               &thread_ctxs,
               {candidates.begin() + i, candidates.begin() + i + batch},
               player,
               scores.data() + i
            );
            std::cout << "\n batch " << i << " Rating: ";
            for (int j = 0; j < batch; ++j) {
               std::cout << scores << " ";
            }
            std::cout << std::endl << std::flush;
         }
   }

   // 主线程处理排序
   temp_moves.reserve(candidates.size());
   for (size_t i = 0; i < candidates.size(); ++i) {
         temp_moves.push_back({ scores, { candidates.first, candidates.second } });
   }

   const int partial_sort_size = std::min(static_cast<int>(temp_moves.size()), top_n * 2);
   std::partial_sort(
         temp_moves.begin(),
         temp_moves.begin() + partial_sort_size,
         temp_moves.end(),
         [](const Move& a, const Move& b) { return a.score > b.score; }
   );

   ctx->num_sorted_moves = std::min(top_n, static_cast<int>(temp_moves.size()));
   for (int i = 0; i < ctx->num_sorted_moves; ++i) {
         if (i >= MAX_SORTED_MOVES) break;
         ctx->sorted_moves = temp_moves;
   }
}

}

copy_core_data函数
inline void copy_core_data(
   SIMDContext* dst,
   const AlignedSIMDContext* src
) {
   static_assert(sizeof(dst->vec_states) == sizeof(src->vec_states),
                  "VectorState 大小不匹配");
   memcpy(dst->vec_states, src->vec_states, sizeof(VectorState)*MAX_VECTORS);

   for (int k = 0; k < 6; ++k) {
         for (int j = 0; j < SIMD_WIDTH; ++j) {
            dst->simd_black = src->black_counts;
            dst->simd_white = src->white_counts;
         }
   }
}

batch_process_avx2函数
void batch_process_avx2(
SIMDContext* ctx,
const std::vector<std::pair<int, int>>& moves,
int player,
int* scores)
{

alignas(32) int indices;

for (size_t i = 0; i < moves.size(); i += SIMD_WIDTH) {
   printf("\n+++++ batch_process_avx2 line 252 simd_white++++++: ");
   for (int j = 0; j < 8; ++j) {
         printf("\n");
         for (int k = 0; k < 6; ++k) {
            printf(" :%d", ctx->simd_white);
         }

   }
   const int batch = std::min(SIMD_WIDTH, static_cast<int>(moves.size() - i));

   // 填充索引数组
   for (int j = 0; j < batch; ++j) {
         const auto& = moves;
         indices = y * BOARD_SIZE + x;
   }
   for (int j = batch; j < SIMD_WIDTH; ++j) {
         indices = 0;
   }

   __m256i v_idx = _mm256_load_si256(reinterpret_cast<__m256i*>(indices));

   // 根据玩家更新计数器
   __m256i v_counts;
   alignas(32) int new_counts;
   alignas(32) int stored_indices;
   _mm256_store_si256(reinterpret_cast<__m256i*>(stored_indices), v_idx);

   if (player == BLACK_PLAYER) {
         v_counts = _mm256_i32gather_epi32(ctx->black_counts, v_idx, sizeof(int32_t));
         v_counts = _mm256_add_epi32(v_counts, _mm256_set1_epi32(1));
         _mm256_store_si256(reinterpret_cast<__m256i*>(new_counts), v_counts);
         for (int j = 0; j < batch; ++j) {
            ctx->black_counts] = new_counts;
         }
   } else {
         v_counts = _mm256_i32gather_epi32(ctx->white_counts, v_idx, sizeof(int32_t));
         v_counts = _mm256_add_epi32(v_counts, _mm256_set1_epi32(1));
         _mm256_store_si256(reinterpret_cast<__m256i*>(new_counts), v_counts);
         for (int j = 0; j < batch; ++j) {
            ctx->white_counts] = new_counts;
         }
   }
   // 更新SIMD计数器
   printf("\n|-|-|-|-| batch_process_avx2 line 250 player= %d andBLACK_PLAYER= %d <k:j> ",player,BLACK_PLAYER);
   for (int m = 0; m < batch; ++m) {
         const int k = std::clamp(new_counts, 0, 5);
         printf("<%d:%d> | ",k ,m);
         if (player == BLACK_PLAYER) {
            ctx->simd_black++;
         } else {
            printf("<> | <> | <> | <> | <> | <> | <> | <> | <> | <> | <> | <> | ");
            ctx->simd_white++;
         }
   }

   __m256i v_scores = calculate_scores_avx2(ctx, player);
   printf("\n--- batch_process_avx2 line 252 simd_white++++++: ");
   for (int j = 0; j < 8; ++j) {
         printf("\n");
         for (int k = 0; k < 6; ++k) {
            printf(" :%d", ctx->simd_white);
         }

   }
   alignas(32) int score_values;
   _mm256_store_si256(reinterpret_cast<__m256i*>(score_values), v_scores);
   for (int j = 0; j < batch; ++j) {
         scores = score_values;
   }
}
}

两个数据体结构
struct alignas(64) SIMDContext {
VectorState vec_states;
alignas(32) int32_t black_counts;
alignas(32) int32_t white_counts;
// 核心状态（32字节对齐）
alignas(32) int32_t simd_black;
alignas(32) int32_t simd_white;
};

class CORE_API alignas(SIMD_ALIGNMENT) AlignedSIMDContext {
public:

Vector vectors;
VectorState vec_states;
Move sorted_moves;
int32_t num_sorted_moves = 0;

alignas(32) int32_t black_counts;
alignas(32) int32_t white_counts;

FishC 发表于 2025-3-10 20:58:29

数据混乱的可能原因有以下几个方向，你可以根据这些方向进行排查：

1. 线程安全问题
你的代码可能在多个线程中并行执行，而 `ctx->white_counts` 和 `ctx->black_counts` 可能没有正确同步，导致数据竞争。可以尝试使用 `#pragma omp critical` 或者 `std::atomic<int32_t>` 来确保数据一致性。

2. SIMD 访问越界
在 `batch_process_avx2` 函数中，你使用 `_mm256_i32gather_epi32` 进行数据加载，确保 `ctx->white_counts` 和 `ctx->black_counts` 的内存对齐，并且 `indices` 没有超出数组范围。

3. 变量未正确初始化
检查 `ctx->white_counts` 和 `ctx->black_counts` 是否在 `SIMDContext` 结构体初始化时被正确赋值，避免未定义行为。

4. 误修改数据
在 `batch_process_avx2` 里，你使用 `_mm256_store_si256` 存储 `new_counts`，但 `ctx->white_counts` 可能被错误地更新。可以在 `for` 循环中打印 `indices` 和 `new_counts`，确保它们的值正确。

5. 结构体对齐问题
`SIMDContext` 结构体使用 `alignas(32)` 进行对齐，但 `AlignedSIMDContext` 可能没有完全匹配，导致 `ctx->white_counts` 访问异常。可以尝试 `static_assert(alignof(SIMDContext) == 32, "Alignment issue")` 进行检查。

你可以从这些方向入手，逐步排查问题。

以上回复来自 FishC AI 小助理，如未能正确解答您的问题，请继续追问。

页: [1]

鱼C论坛's Archiver

数据混乱问题