123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730 |
- #include "textflag.h"
- TEXT ·emitLiteral(SB), NOSPLIT, $24-56
- MOVQ dst_base+0(FP), DI
- MOVQ lit_base+24(FP), R10
- MOVQ lit_len+32(FP), AX
- MOVQ AX, DX
- MOVL AX, BX
- SUBL $1, BX
- CMPL BX, $60
- JLT oneByte
- CMPL BX, $256
- JLT twoBytes
- threeBytes:
- MOVB $0xf4, 0(DI)
- MOVW BX, 1(DI)
- ADDQ $3, DI
- ADDQ $3, DX
- JMP memmove
- twoBytes:
- MOVB $0xf0, 0(DI)
- MOVB BX, 1(DI)
- ADDQ $2, DI
- ADDQ $2, DX
- JMP memmove
- oneByte:
- SHLB $2, BX
- MOVB BX, 0(DI)
- ADDQ $1, DI
- ADDQ $1, DX
- memmove:
- MOVQ DX, ret+48(FP)
-
-
-
-
- MOVQ DI, 0(SP)
- MOVQ R10, 8(SP)
- MOVQ AX, 16(SP)
- CALL runtime·memmove(SB)
- RET
- TEXT ·emitCopy(SB), NOSPLIT, $0-48
- MOVQ dst_base+0(FP), DI
- MOVQ DI, SI
- MOVQ offset+24(FP), R11
- MOVQ length+32(FP), AX
- loop0:
-
- CMPL AX, $68
- JLT step1
-
- MOVB $0xfe, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- SUBL $64, AX
- JMP loop0
- step1:
-
- CMPL AX, $64
- JLE step2
-
- MOVB $0xee, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- SUBL $60, AX
- step2:
-
- CMPL AX, $12
- JGE step3
- CMPL R11, $2048
- JGE step3
-
- MOVB R11, 1(DI)
- SHRL $8, R11
- SHLB $5, R11
- SUBB $4, AX
- SHLB $2, AX
- ORB AX, R11
- ORB $1, R11
- MOVB R11, 0(DI)
- ADDQ $2, DI
-
- SUBQ SI, DI
- MOVQ DI, ret+40(FP)
- RET
- step3:
-
- SUBL $1, AX
- SHLB $2, AX
- ORB $2, AX
- MOVB AX, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
-
- SUBQ SI, DI
- MOVQ DI, ret+40(FP)
- RET
- TEXT ·extendMatch(SB), NOSPLIT, $0-48
- MOVQ src_base+0(FP), DX
- MOVQ src_len+8(FP), R14
- MOVQ i+24(FP), R15
- MOVQ j+32(FP), SI
- ADDQ DX, R14
- ADDQ DX, R15
- ADDQ DX, SI
- MOVQ R14, R13
- SUBQ $8, R13
- cmp8:
-
-
- CMPQ SI, R13
- JA cmp1
- MOVQ (R15), AX
- MOVQ (SI), BX
- CMPQ AX, BX
- JNE bsf
- ADDQ $8, R15
- ADDQ $8, SI
- JMP cmp8
- bsf:
-
-
-
-
- XORQ AX, BX
- BSFQ BX, BX
- SHRQ $3, BX
- ADDQ BX, SI
-
- SUBQ DX, SI
- MOVQ SI, ret+40(FP)
- RET
- cmp1:
-
- CMPQ SI, R14
- JAE extendMatchEnd
- MOVB (R15), AX
- MOVB (SI), BX
- CMPB AX, BX
- JNE extendMatchEnd
- ADDQ $1, R15
- ADDQ $1, SI
- JMP cmp1
- extendMatchEnd:
-
- SUBQ DX, SI
- MOVQ SI, ret+40(FP)
- RET
- TEXT ·encodeBlock(SB), 0, $32888-56
- MOVQ dst_base+0(FP), DI
- MOVQ src_base+24(FP), SI
- MOVQ src_len+32(FP), R14
-
- MOVQ $24, CX
- MOVQ $256, DX
- calcShift:
-
-
-
- CMPQ DX, $16384
- JGE varTable
- CMPQ DX, R14
- JGE varTable
- SUBQ $1, CX
- SHLQ $1, DX
- JMP calcShift
- varTable:
-
-
-
-
-
-
- SHRQ $3, DX
- LEAQ table-32768(SP), BX
- PXOR X0, X0
- memclr:
- MOVOU X0, 0(BX)
- ADDQ $16, BX
- SUBQ $1, DX
- JNZ memclr
-
- MOVQ SI, DX
-
- MOVQ R14, R9
- SUBQ $15, R9
-
-
- MOVQ CX, 56(SP)
- MOVQ DX, 64(SP)
- MOVQ R9, 88(SP)
-
- MOVQ DX, R10
-
- ADDQ $1, SI
-
- MOVL 0(SI), R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
- outer:
-
-
- MOVQ $32, R12
-
- MOVQ SI, R13
-
- MOVQ $0, R15
- inner0:
-
-
- MOVQ R13, SI
-
- MOVQ R12, R14
- SHRQ $5, R14
-
- ADDQ R14, R13
-
- ADDQ R14, R12
-
- MOVQ R13, AX
- SUBQ DX, AX
- CMPQ AX, R9
- JA emitRemainder
-
-
-
- BYTE $0x4e
- BYTE $0x0f
- BYTE $0xb7
- BYTE $0x7c
- BYTE $0x5c
- BYTE $0x78
-
- MOVQ SI, AX
- SUBQ DX, AX
-
-
- BYTE $0x66
- BYTE $0x42
- BYTE $0x89
- BYTE $0x44
- BYTE $0x5c
- BYTE $0x78
-
- MOVL 0(R13), R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
- MOVL 0(SI), AX
- MOVL (DX)(R15*1), BX
- CMPL AX, BX
- JNE inner0
- fourByteMatch:
-
-
-
-
-
- MOVQ SI, AX
- SUBQ R10, AX
- CMPQ AX, $16
- JLE emitLiteralFastPath
-
-
-
-
- MOVL AX, BX
- SUBL $1, BX
- CMPL BX, $60
- JLT inlineEmitLiteralOneByte
- CMPL BX, $256
- JLT inlineEmitLiteralTwoBytes
- inlineEmitLiteralThreeBytes:
- MOVB $0xf4, 0(DI)
- MOVW BX, 1(DI)
- ADDQ $3, DI
- JMP inlineEmitLiteralMemmove
- inlineEmitLiteralTwoBytes:
- MOVB $0xf0, 0(DI)
- MOVB BX, 1(DI)
- ADDQ $2, DI
- JMP inlineEmitLiteralMemmove
- inlineEmitLiteralOneByte:
- SHLB $2, BX
- MOVB BX, 0(DI)
- ADDQ $1, DI
- inlineEmitLiteralMemmove:
-
-
-
-
-
-
- MOVQ DI, 0(SP)
- MOVQ R10, 8(SP)
- MOVQ AX, 16(SP)
- ADDQ AX, DI
- MOVQ SI, 72(SP)
- MOVQ DI, 80(SP)
- MOVQ R15, 112(SP)
- CALL runtime·memmove(SB)
- MOVQ 56(SP), CX
- MOVQ 64(SP), DX
- MOVQ 72(SP), SI
- MOVQ 80(SP), DI
- MOVQ 88(SP), R9
- MOVQ 112(SP), R15
- JMP inner1
- inlineEmitLiteralEnd:
-
-
- emitLiteralFastPath:
-
- MOVB AX, BX
- SUBB $1, BX
- SHLB $2, BX
- MOVB BX, (DI)
- ADDQ $1, DI
-
-
-
-
-
-
-
-
-
- MOVOU 0(R10), X0
- MOVOU X0, 0(DI)
- ADDQ AX, DI
- inner1:
-
-
- MOVQ SI, R12
-
- MOVQ R12, R11
- SUBQ R15, R11
- SUBQ DX, R11
-
-
-
-
-
- MOVQ src_len+32(FP), R14
- ADDQ DX, R14
-
- MOVQ R14, R13
- SUBQ $8, R13
-
- ADDQ $4, R15
- ADDQ DX, R15
-
- ADDQ $4, SI
- inlineExtendMatchCmp8:
-
-
- CMPQ SI, R13
- JA inlineExtendMatchCmp1
- MOVQ (R15), AX
- MOVQ (SI), BX
- CMPQ AX, BX
- JNE inlineExtendMatchBSF
- ADDQ $8, R15
- ADDQ $8, SI
- JMP inlineExtendMatchCmp8
- inlineExtendMatchBSF:
-
-
-
-
- XORQ AX, BX
- BSFQ BX, BX
- SHRQ $3, BX
- ADDQ BX, SI
- JMP inlineExtendMatchEnd
- inlineExtendMatchCmp1:
-
- CMPQ SI, R14
- JAE inlineExtendMatchEnd
- MOVB (R15), AX
- MOVB (SI), BX
- CMPB AX, BX
- JNE inlineExtendMatchEnd
- ADDQ $1, R15
- ADDQ $1, SI
- JMP inlineExtendMatchCmp1
- inlineExtendMatchEnd:
-
-
-
-
-
-
-
- MOVQ SI, AX
- SUBQ R12, AX
- inlineEmitCopyLoop0:
-
- CMPL AX, $68
- JLT inlineEmitCopyStep1
-
- MOVB $0xfe, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- SUBL $64, AX
- JMP inlineEmitCopyLoop0
- inlineEmitCopyStep1:
-
- CMPL AX, $64
- JLE inlineEmitCopyStep2
-
- MOVB $0xee, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- SUBL $60, AX
- inlineEmitCopyStep2:
-
- CMPL AX, $12
- JGE inlineEmitCopyStep3
- CMPL R11, $2048
- JGE inlineEmitCopyStep3
-
- MOVB R11, 1(DI)
- SHRL $8, R11
- SHLB $5, R11
- SUBB $4, AX
- SHLB $2, AX
- ORB AX, R11
- ORB $1, R11
- MOVB R11, 0(DI)
- ADDQ $2, DI
- JMP inlineEmitCopyEnd
- inlineEmitCopyStep3:
-
- SUBL $1, AX
- SHLB $2, AX
- ORB $2, AX
- MOVB AX, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- inlineEmitCopyEnd:
-
-
-
- MOVQ SI, R10
-
- MOVQ SI, AX
- SUBQ DX, AX
- CMPQ AX, R9
- JAE emitRemainder
-
-
-
-
- MOVQ -1(SI), R14
-
- MOVL R14, R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
- MOVQ SI, AX
- SUBQ DX, AX
- SUBQ $1, AX
-
-
- BYTE $0x66
- BYTE $0x42
- BYTE $0x89
- BYTE $0x44
- BYTE $0x5c
- BYTE $0x78
-
- SHRQ $8, R14
- MOVL R14, R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
-
-
- BYTE $0x4e
- BYTE $0x0f
- BYTE $0xb7
- BYTE $0x7c
- BYTE $0x5c
- BYTE $0x78
-
- ADDQ $1, AX
-
-
- BYTE $0x66
- BYTE $0x42
- BYTE $0x89
- BYTE $0x44
- BYTE $0x5c
- BYTE $0x78
-
- MOVL (DX)(R15*1), BX
- CMPL R14, BX
- JEQ inner1
-
- SHRQ $8, R14
- MOVL R14, R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
- ADDQ $1, SI
-
- JMP outer
- emitRemainder:
-
- MOVQ src_len+32(FP), AX
- ADDQ DX, AX
- CMPQ R10, AX
- JEQ encodeBlockEnd
-
-
-
- MOVQ DI, 0(SP)
- MOVQ $0, 8(SP)
- MOVQ $0, 16(SP)
- MOVQ R10, 24(SP)
- SUBQ R10, AX
- MOVQ AX, 32(SP)
- MOVQ AX, 40(SP)
-
- MOVQ DI, 80(SP)
- CALL ·emitLiteral(SB)
- MOVQ 80(SP), DI
-
- ADDQ 48(SP), DI
- encodeBlockEnd:
- MOVQ dst_base+0(FP), AX
- SUBQ AX, DI
- MOVQ DI, d+48(FP)
- RET
|