123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- TEXT ·galMulSSSE3Xor(SB), 7, $0
- MOVQ low+0(FP), SI
- MOVQ high+24(FP), DX
- MOVOU (SI), X6
- MOVOU (DX), X7
- MOVQ $15, BX
- MOVQ BX, X8
- PXOR X5, X5
- MOVQ in+48(FP), SI
- MOVQ in_len+56(FP), R9
- MOVQ out+72(FP), DX
- PSHUFB X5, X8
- SHRQ $4, R9
- CMPQ R9, $0
- JEQ done_xor
- loopback_xor:
- MOVOU (SI), X0
- MOVOU (DX), X4
- MOVOU X0, X1
- MOVOU X6, X2
- MOVOU X7, X3
- PSRLQ $4, X1
- PAND X8, X0
- PAND X8, X1
- PSHUFB X0, X2
- PSHUFB X1, X3
- PXOR X2, X3
- PXOR X4, X3
- MOVOU X3, (DX)
- ADDQ $16, SI
- ADDQ $16, DX
- SUBQ $1, R9
- JNZ loopback_xor
- done_xor:
- RET
- TEXT ·galMulSSSE3(SB), 7, $0
- MOVQ low+0(FP), SI
- MOVQ high+24(FP), DX
- MOVOU (SI), X6
- MOVOU (DX), X7
- MOVQ $15, BX
- MOVQ BX, X8
- PXOR X5, X5
- MOVQ in+48(FP), SI
- MOVQ in_len+56(FP), R9
- MOVQ out+72(FP), DX
- PSHUFB X5, X8
- SHRQ $4, R9
- CMPQ R9, $0
- JEQ done
- loopback:
- MOVOU (SI), X0
- MOVOU X0, X1
- MOVOU X6, X2
- MOVOU X7, X3
- PSRLQ $4, X1
- PAND X8, X0
- PAND X8, X1
- PSHUFB X0, X2
- PSHUFB X1, X3
- PXOR X2, X3
- MOVOU X3, (DX)
- ADDQ $16, SI
- ADDQ $16, DX
- SUBQ $1, R9
- JNZ loopback
- done:
- RET
- TEXT ·galMulAVX2Xor(SB), 7, $0
- MOVQ low+0(FP), SI
- MOVQ high+24(FP), DX
- MOVQ $15, BX
- MOVQ BX, X5
- MOVOU (SI), X6
- MOVOU (DX), X7
- MOVQ in_len+56(FP), R9
- LONG $0x384de3c4; WORD $0x01f6
- LONG $0x3845e3c4; WORD $0x01ff
- LONG $0x787d62c4; BYTE $0xc5
- SHRQ $5, R9
- MOVQ out+72(FP), DX
- MOVQ in+48(FP), SI
- TESTQ R9, R9
- JZ done_xor_avx2
- loopback_xor_avx2:
- LONG $0x066ffec5
- LONG $0x226ffec5
- LONG $0xd073f5c5; BYTE $0x04
- LONG $0xdb7dc1c4; BYTE $0xc0
- LONG $0xdb75c1c4; BYTE $0xc8
- LONG $0x004de2c4; BYTE $0xd0
- LONG $0x0045e2c4; BYTE $0xd9
- LONG $0xdbefedc5
- LONG $0xe4efe5c5
- LONG $0x227ffec5
- ADDQ $32, SI
- ADDQ $32, DX
- SUBQ $1, R9
- JNZ loopback_xor_avx2
- done_xor_avx2:
-
- BYTE $0xc5; BYTE $0xf8; BYTE $0x77
- RET
- TEXT ·galMulAVX2(SB), 7, $0
- MOVQ low+0(FP), SI
- MOVQ high+24(FP), DX
- MOVQ $15, BX
- MOVQ BX, X5
- MOVOU (SI), X6
- MOVOU (DX), X7
- MOVQ in_len+56(FP), R9
- LONG $0x384de3c4; WORD $0x01f6
- LONG $0x3845e3c4; WORD $0x01ff
- LONG $0x787d62c4; BYTE $0xc5
- SHRQ $5, R9
- MOVQ out+72(FP), DX
- MOVQ in+48(FP), SI
- TESTQ R9, R9
- JZ done_avx2
- loopback_avx2:
- LONG $0x066ffec5
- LONG $0xd073f5c5; BYTE $0x04
- LONG $0xdb7dc1c4; BYTE $0xc0
- LONG $0xdb75c1c4; BYTE $0xc8
- LONG $0x004de2c4; BYTE $0xd0
- LONG $0x0045e2c4; BYTE $0xd9
- LONG $0xe3efedc5
- LONG $0x227ffec5
- ADDQ $32, SI
- ADDQ $32, DX
- SUBQ $1, R9
- JNZ loopback_avx2
- done_avx2:
- BYTE $0xc5; BYTE $0xf8; BYTE $0x77
- RET
|