|
5鱼币
数组data最终的数据是哪个()。#define MAX_DATA_SIZE (10)
int main()
{
unsigned int i = 0;
unsigned char data[MAX_DATA_SIZE] = { '1','2','3','4','5','6','7','8','9','0' };
memcpy(&data[2], data, MAX_DATA_SIZE / 2);
return 0;
}
1234567890
1212121890
1212345890
其他结果都不对
KEY:C???
不应该是B、1212121890吗??
拷贝过来data里的数据就应该变了呀??
我们以1212345890为例,具体看一看memcpy内部是怎么做的(在vs2017中)
通过分析memcpy的源代码,我发现vs是先复制后4个字节,然后再复制最前面那1个字节
先把[1] [2] [3] [4] 复制到[3] [4] [5] [6] (一次性复制4个字节)
最后把[0]复制到[2]
如果你有能力研究memcpy的源代码,那么请看下面的memcpy源代码,如果没有能力研究,那么目前你只是记住“不同的编译器有不同的方法,甚至同一个编译器的不同编译选项都有不同的结果”就行了,等你有了这个能力以后,自己去看源代码 page ,132
title memcpy - Copy source memory bytes to destination
;***
;memcpy.asm - contains memcpy and memmove routines
;
; Copyright (c) Microsoft Corporation. All rights reserved.
;
;Purpose:
; memcpy() copies a source memory buffer to a destination buffer.
; Overlapping buffers are not treated specially, so propogation may occur.
; memmove() copies a source memory buffer to a destination buffer.
; Overlapping buffers are treated specially, to avoid propogation.
;
;*******************************************************************************
.xlist
include vcruntime.inc
.list
.xmm
M_EXIT macro
ret ; _cdecl return
endm ; M_EXIT
PALIGN_memcpy macro d
MovPalign&d&:
movdqa xmm1,xmmword ptr [esi-d]
lea esi, byte ptr [esi-d]
align @WordSize
PalignLoop&d&:
movdqa xmm3,xmmword ptr [esi+10h]
sub ecx,30h
movdqa xmm0,xmmword ptr [esi+20h]
movdqa xmm5,xmmword ptr [esi+30h]
lea esi, xmmword ptr [esi+30h]
cmp ecx,30h
movdqa xmm2,xmm3
palignr xmm3,xmm1,d
movdqa xmmword ptr [edi],xmm3
movdqa xmm4,xmm0
palignr xmm0,xmm2,d
movdqa xmmword ptr [edi+10h],xmm0
movdqa xmm1,xmm5
palignr xmm5,xmm4,d
movdqa xmmword ptr [edi+20h],xmm5
lea edi, xmmword ptr [edi+30h]
jae PalignLoop&d&
lea esi, xmmword ptr [esi+d]
endm ; PALIGN_memcpy
CODESEG
extrn __isa_available:dword
extrn __isa_enabled:dword
extrn __favor:dword
page
;***
;memcpy - Copy source buffer to destination buffer
;
;Purpose:
; memcpy() copies a source memory buffer to a destination memory buffer.
; This routine does NOT recognize overlapping buffers, and thus can lead
; to propogation.
; For cases where propogation must be avoided, memmove() must be used.
;
; Algorithm:
;
; Same as memmove. See Below
;
;
;memmove - Copy source buffer to destination buffer
;
;Purpose:
; memmove() copies a source memory buffer to a destination memory buffer.
; This routine recognize overlapping buffers to avoid propogation.
; For cases where propogation is not a problem, memcpy() can be used.
;
; Algorithm:
;
; void * memmove(void * dst, void * src, size_t count)
; {
; void * ret = dst;
;
; if (dst <= src || dst >= (src + count)) {
; /*
; * Non-Overlapping Buffers
; * copy from lower addresses to higher addresses
; */
; while (count--)
; *dst++ = *src++;
; }
; else {
; /*
; * Overlapping Buffers
; * copy from higher addresses to lower addresses
; */
; dst += count - 1;
; src += count - 1;
;
; while (count--)
; *dst-- = *src--;
; }
;
; return(ret);
; }
;
;
;Entry:
; void *dst = pointer to destination buffer
; const void *src = pointer to source buffer
; size_t count = number of bytes to copy
;
;Exit:
; Returns a pointer to the destination buffer in AX/DX:AX
;
;Uses:
; CX, DX
;
;Exceptions:
;*******************************************************************************
ifdef MEM_MOVE
_MEM_ equ <memmove>
else ; MEM_MOVE
_MEM_ equ <memcpy>
endif ; MEM_MOVE
% public _MEM_
_MEM_ proc \
dst:ptr byte, \
src:ptr byte, \
count:IWORD
; destination pointer
; source pointer
; number of bytes to copy
OPTION PROLOGUE:NONE, EPILOGUE:NONE
push edi ; save edi
push esi ; save esi
; size param/4 prolog byte #reg saved
.FPO ( 0, 3 , $-_MEM_ , 2, 0, 0 )
mov esi,[esp + 010h] ; esi = source
mov ecx,[esp + 014h] ; ecx = number of bytes to move
mov edi,[esp + 0Ch] ; edi = dest
;
; Check for overlapping buffers:
; If (dst <= src) Or (dst >= src + Count) Then
; Do normal (Upwards) Copy
; Else
; Do Downwards Copy to avoid propagation
;
mov eax,ecx ; eax = byte count
mov edx,ecx ; edx = byte count
add eax,esi ; eax = point past source end
cmp edi,esi ; dst <= src ?
jbe short CopyUp ; no overlap: copy toward higher addresses
cmp edi,eax ; dst < (src + count) ?
jb CopyDown ; overlap: copy toward lower addresses
;
; Buffers do not overlap, copy toward higher addresses.
;
CopyUp:
cmp ecx, 020h
jb CopyUpDwordMov ; size smaller than 32 bytes, use dwords
cmp ecx, 080h
jae CopyUpLargeMov ; if greater than or equal to 128 bytes, use Enhanced fast Strings
bt __isa_enabled, __ISA_AVAILABLE_SSE2
jc XmmCopySmallTest
jmp Dword_align
CopyUpLargeMov:
bt __favor, __FAVOR_ENFSTRG ; check if Enhanced Fast Strings is supported
jnc CopyUpSSE2Check ; if not, check for SSE2 support
rep movsb
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi
pop edi
M_EXIT
;
; Check if source and destination are equally aligned.
;
CopyUpSSE2Check:
mov eax,edi
xor eax,esi
test eax,15
jne AtomChk ; Not aligned go check Atom
bt __isa_enabled, __ISA_AVAILABLE_SSE2
jc XmmCopy ; yes, go SSE2 copy (params already set)
AtomChk:
; Is Atom supported?
bt __favor, __FAVOR_ATOM
jnc Dword_align ; no,jump
; check if dst is 4 byte aligned
test edi, 3
jne Dword_align
; check if src is 4 byte aligned
test esi, 3
jne Dword_align_Ok
; A software pipelining vectorized memcpy loop using PALIGN instructions
; (1) copy the first bytes to align dst up to the nearest 16-byte boundary
; 4 byte align -> 12 byte copy, 8 byte align -> 8 byte copy, 12 byte align -> 4 byte copy
PalignHead4:
bt edi, 2
jae PalignHead8
mov eax, dword ptr [esi]
sub ecx, 4
lea esi, byte ptr [esi+4]
mov dword ptr [edi], eax
lea edi, byte ptr [edi+4]
PalignHead8:
bt edi, 3
jae PalignLoop
movq xmm1, qword ptr [esi]
sub ecx, 8
lea esi, byte ptr [esi+8]
movq qword ptr [edi], xmm1
lea edi, byte ptr [edi+8]
;(2) Use SSE palign loop
PalignLoop:
test esi, 7
je MovPalign8
bt esi, 3
jae MovPalign4
PALIGN_memcpy 12
jmp PalignTail
PALIGN_memcpy 8
jmp PalignTail
PALIGN_memcpy 4
;(3) Copy the tailing bytes.
PalignTail:
cmp ecx,10h
jb PalignTail4
movdqu xmm1,xmmword ptr [esi]
sub ecx, 10h
lea esi, xmmword ptr [esi+10h]
movdqa xmmword ptr [edi],xmm1
lea edi, xmmword ptr [edi+10h]
jmp PalignTail
PalignTail4:
bt ecx, 2
jae PalignTail8
mov eax, dword ptr [esi]
sub ecx,4
lea esi, byte ptr [esi+4]
mov dword ptr [edi], eax
lea edi, byte ptr [edi+4]
PalignTail8:
bt ecx, 3
jae PalignTailLE3
movq xmm1, qword ptr [esi]
sub ecx,8
lea esi, byte ptr [esi+8]
movq qword ptr [edi], xmm1
lea edi, byte ptr [edi+8]
PalignTailLE3:
mov eax, dword ptr TrailingUpVec[ecx*4]
jmp eax
; The algorithm for forward moves is to align the destination to a dword
; boundary and so we can move dwords with an aligned destination. This
; occurs in 3 steps.
;
; - move x = ((4 - Dest & 3) & 3) bytes
; - move y = ((L-x) >> 2) dwords
; - move (L - x - y*4) bytes
;
Dword_align:
test edi,11b ; check if destination is dword aligned
jz short Dword_align_Ok ; if destination not dword aligned already, it should be aligned
Dword_up_align_loop:
mov al, byte ptr [esi]
mov byte ptr [edi], al
dec ecx
add esi, 1
add edi, 1
test edi, 11b
jnz Dword_up_align_loop
Dword_align_Ok:
mov edx, ecx
cmp ecx, 32
jb CopyUpDwordMov
shr ecx,2
rep movsd ; move all of our dwords
and edx,11b ; trailing byte count
jmp dword ptr TrailingUpVec[edx*4] ; process trailing bytes
;
; Code to do optimal memory copies for non-dword-aligned destinations.
;
; The following length check is done for two reasons:
;
; 1. to ensure that the actual move length is greater than any possiale
; alignment move, and
;
; 2. to skip the multiple move logic for small moves where it would
; be faster to move the bytes with one instruction.
;
align @WordSize
ByteCopyUp:
jmp dword ptr TrailingUpVec[ecx*4+16] ; process just bytes
;-----------------------------------------------------------------------------
align @WordSize
TrailingUpVec dd TrailingUp0, TrailingUp1, TrailingUp2, TrailingUp3
align @WordSize
TrailingUp0:
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi ; restore esi
pop edi ; restore edi
; spare
M_EXIT
align @WordSize
TrailingUp1:
mov al,[esi] ; get byte from source
; spare
mov [edi],al ; put byte in destination
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi ; restore esi
pop edi ; restore edi
M_EXIT
align @WordSize
TrailingUp2:
mov al,[esi] ; get first byte from source
; spare
mov [edi],al ; put first byte into destination
mov al,[esi+1] ; get second byte from source
mov [edi+1],al ; put second byte into destination
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi ; restore esi
pop edi ; restore edi
M_EXIT
align @WordSize
TrailingUp3:
mov al,[esi] ; get first byte from source
; spare
mov [edi],al ; put first byte into destination
mov al,[esi+1] ; get second byte from source
mov [edi+1],al ; put second byte into destination
mov al,[esi+2] ; get third byte from source
mov [edi+2],al ; put third byte into destination
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi ; restore esi
pop edi ; restore edi
M_EXIT
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; Copy down to avoid propogation in overlapping buffers.
align @WordSize
CopyDown:
; inserting check for size. For < 16 bytes, use dwords without checkign for alignment
lea esi, [esi+ecx] ; esi, edi pointing to the end of the buffer
lea edi, [edi+ecx]
cmp ecx, 32
jb CopyDownSmall
bt __isa_enabled, __ISA_AVAILABLE_SSE2
jc XmmMovLargeAlignTest
; See if the destination start is dword aligned
test edi,11b ; Test if dword aligned
jz CopyDownAligned ; If not, jump
CopyDownNotAligned:
mov edx,edi ; get destination offset
and edx, 11b
sub ecx, edx
CopyDownAlignLoop:
mov al, byte ptr [esi-1]
mov byte ptr[edi-1], al
dec esi
dec edi
sub edx, 1
jnz CopyDownAlignLoop
CopyDownAligned:
cmp ecx,32 ; test if small enough for unwind copy
jb CopyDownSmall ; if so, then jump
mov edx, ecx
shr ecx,2 ; shift down to dword count
and edx,11b ; trailing byte count
sub esi, 4
sub edi, 4 ; settign up src, dest registers
std ; set direction flag
rep movsd ; move all of dwords at once
cld ; clear direction flag back
jmp dword ptr TrailingDownVec[edx*4]; process trailing bytes
;-----------------------------------------------------------------------------
align @WordSize
TrailingDownVec dd TrailingDown0, TrailingDown1, TrailingDown2, TrailingDown3
align @WordSize
TrailingDown0:
mov eax,[esp + 0Ch] ; return original destination pointer
; spare
pop esi ; restore esi
pop edi ; restore edi
M_EXIT
align @WordSize
TrailingDown1:
mov al,[esi+3] ; get byte from source
; spare
mov [edi+3],al ; put byte in destination
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi ; restore esi
pop edi ; restore edi
M_EXIT
align @WordSize
TrailingDown2:
mov al,[esi+3] ; get first byte from source
; spare
mov [edi+3],al ; put first byte into destination
mov al,[esi+2] ; get second byte from source
mov [edi+2],al ; put second byte into destination
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi ; restore esi
pop edi ; restore edi
M_EXIT
align @WordSize
TrailingDown3:
mov al,[esi+3] ; get first byte from source
; spare
mov [edi+3],al ; put first byte into destination
mov al,[esi+2] ; get second byte from source
mov [edi+2],al ; put second byte into destination
mov al,[esi+1] ; get third byte from source
mov [edi+1],al ; put third byte into destination
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi ; restore esi
pop edi ; restore edi
M_EXIT
; Copy overlapping buffers using XMM registers
XmmMovLargeAlignTest:
test edi, 0Fh ; check if it's 16-byte aligned
jz XmmMovLargeLoop
XmmMovAlignLoop:
dec ecx
dec esi
dec edi
mov al, [esi]
mov [edi], al
test edi, 0Fh
jnz XmmMovAlignLoop
XmmMovLargeLoop:
cmp ecx, 128
jb XmmMovSmallTest
sub esi, 128
sub edi, 128
movdqu xmm0, xmmword ptr[esi]
movdqu xmm1, xmmword ptr[esi+16]
movdqu xmm2, xmmword ptr[esi+32]
movdqu xmm3, xmmword ptr[esi+48]
movdqu xmm4, xmmword ptr[esi+64]
movdqu xmm5, xmmword ptr[esi+80]
movdqu xmm6, xmmword ptr[esi+96]
movdqu xmm7, xmmword ptr[esi+112]
movdqu xmmword ptr[edi], xmm0
movdqu xmmword ptr[edi+16], xmm1
movdqu xmmword ptr[edi+32], xmm2
movdqu xmmword ptr[edi+48], xmm3
movdqu xmmword ptr[edi+64], xmm4
movdqu xmmword ptr[edi+80], xmm5
movdqu xmmword ptr[edi+96], xmm6
movdqu xmmword ptr[edi+112], xmm7
sub ecx, 128
test ecx, 0FFFFFF80h
jnz XmmMovLargeLoop
XmmMovSmallTest:
cmp ecx, 32 ; if lesser than 32, use dwords
jb CopyDownSmall
XmmMovSmallLoop:
sub esi, 32
sub edi, 32
movdqu xmm0, xmmword ptr[esi]
movdqu xmm1, xmmword ptr[esi+16]
movdqu xmmword ptr[edi], xmm0
movdqu xmmword ptr[edi+16], xmm1
sub ecx, 32
test ecx, 0FFFFFFE0h
jnz XmmMovSmallLoop
CopyDownSmall:
test ecx, 0FFFFFFFCh ; mask the bytes
jz CopyDownByteTest
CopyDownDwordLoop:
sub edi, 4
sub esi, 4
mov eax, [esi]
mov [edi], eax
sub ecx, 4
test ecx, 0FFFFFFFCh
jnz CopyDownDwordLoop
CopyDownByteTest:
test ecx, ecx
jz CopyDownReturn
CopyDownByteLoop:
sub edi, 1
sub esi, 1
mov al, [esi]
mov [edi], al
sub ecx, 1
jnz CopyDownByteLoop
CopyDownReturn:
mov eax,[esp + 0Ch] ; return original destination pointer
; spare
pop esi ; restore esi
pop edi ; restore edi
M_EXIT
; Using XMM registers for non-overlapping buffers
align 16
XmmCopy:
mov eax, esi
and eax, 0Fh
; eax = src and dst alignment (src mod 16)
test eax, eax
jne XmmCopyUnaligned
; in:
; edi = dst (16 byte aligned)
; esi = src (16 byte aligned)
; ecx = len is >= (128 - head alignment bytes)
; do block copy using SSE2 stores
XmmCopyAligned:
mov edx, ecx
and ecx, 7Fh
shr edx, 7
je XmmCopySmallTest
; ecx = loop count
; edx = remaining copy length
; Copy greater than or equal to 128 bytes using XMM registers
align 16
XmmCopyLargeLoop:
movdqa xmm0,xmmword ptr [esi]
movdqa xmm1,xmmword ptr [esi + 10h]
movdqa xmm2,xmmword ptr [esi + 20h]
movdqa xmm3,xmmword ptr [esi + 30h]
movdqa xmmword ptr [edi],xmm0
movdqa xmmword ptr [edi + 10h],xmm1
movdqa xmmword ptr [edi + 20h],xmm2
movdqa xmmword ptr [edi + 30h],xmm3
movdqa xmm4,xmmword ptr [esi + 40h]
movdqa xmm5,xmmword ptr [esi + 50h]
movdqa xmm6,xmmword ptr [esi + 60h]
movdqa xmm7,xmmword ptr [esi + 70h]
movdqa xmmword ptr [edi + 40h],xmm4
movdqa xmmword ptr [edi + 50h],xmm5
movdqa xmmword ptr [edi + 60h],xmm6
movdqa xmmword ptr [edi + 70h],xmm7
lea esi,[esi + 80h]
lea edi,[edi + 80h]
dec edx
jne XmmCopyLargeLoop
; Copy lesser than 128 bytes
XmmCopySmallTest:
test ecx, ecx
je CopyUpReturn
; ecx = length (< 128 bytes)
mov edx, ecx
shr edx, 5 ; check if there are 32 bytes that can be set
test edx, edx
je CopyUpDwordMov
; if > 16 bytes do a loop (16 bytes at a time)
; edx - loop count
; edi = dst
; esi = src
align 16
XmmCopySmallLoop:
movdqu xmm0, xmmword ptr [esi]
movdqu xmm1, xmmword ptr [esi + 10h]
movdqu xmmword ptr [edi], xmm0
movdqu xmmword ptr [edi + 10h], xmm1
lea esi, [esi + 20h]
lea edi, [edi + 20h]
dec edx
jne XmmCopySmallLoop
CopyUpDwordMov:
; last 1-32 bytes: step back according to dst and src alignment and do a 16-byte copy
; esi = src
; eax = src alignment (set at the start of the procedure and preserved up to here)
; edi = dst
; ecx = remaining len
and ecx, 1Fh
je CopyUpReturn
CopyUpDwordTest:
mov eax, ecx ; save remaining len and calc number of dwords
shr ecx, 2
je CopyUpByteTest ; if none try bytes
CopyUpDwordLoop:
mov edx, dword ptr [esi]
mov dword ptr [edi], edx
add edi, 4
add esi, 4
sub ecx, 1
jne CopyUpDwordLoop
CopyUpByteTest:
mov ecx, eax
and ecx, 03h
je CopyUpReturn ; if none return
CopyUpByteLoop:
mov al, byte ptr [esi]
mov byte ptr [edi], al
inc esi
inc edi
dec ecx
jne CopyUpByteLoop
align 16
CopyUpReturn:
; return dst
mov eax,[esp + 0Ch] ; return original destination pointer
pop esi
pop edi
M_EXIT
; dst addr is not 16 byte aligned
align 16
XmmCopyUnaligned:
; copy the first the first 1-15 bytes to align both src and dst up to the nearest 16-byte boundary:
; in
; esi = src
; edi = dst
; eax = src and dst alignment
; ecx = length
mov edx, 010h
sub edx, eax ; calculate number of bytes to get it aligned
sub ecx, edx ; calc new length and save it
push ecx
mov eax, edx ; save alignment byte count for dwords
mov ecx, eax ; set ecx to rep count
and ecx, 03h
je XmmAlignDwordTest ; if no bytes go do dwords
XmmAlignByte:
mov dl, byte ptr [esi] ; move the bytes
mov byte ptr [edi], dl
inc esi ; increment the addresses
inc edi
dec ecx ; decrement the counter
jne XmmAlignByte
XmmAlignDwordTest:
shr eax, 2 ; get dword count
je XmmAlignAdjustCnt ; if none go to main loop
XmmAlignDwordLoop:
mov edx, dword ptr [esi] ; move the dwords
mov dword ptr [edi], edx
lea esi, [esi+4] ; increment the addresses
lea edi, [edi+4]
dec eax ; decrement the counter
jne XmmAlignDwordLoop
XmmAlignAdjustCnt:
pop ecx ; retrieve the adjusted length
jmp XmmCopyAligned
_MEM_ endp
end
|
最佳答案
查看完整内容
我们以1212345890为例,具体看一看memcpy内部是怎么做的(在vs2017中)
通过分析memcpy的源代码,我发现vs是先复制后4个字节,然后再复制最前面那1个字节
先把[1] [2] [3] [4] 复制到[3] [4] [5] [6] (一次性复制4个字节)
最后把[0]复制到[2]
如果你有能力研究memcpy的源代码,那么请看下面的memcpy源代码,如果没有能力研究,那么目前你只是记住“不同的编译器有不同的方法,甚至同一个编译器的不同编译选项都有不同 ...
|