7 жил өмнө · 673047be2c
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@@ -1,6 +1,6 @@
 
				 {
			
 
				 	"ImportPath": "github.com/fatedier/frp",
			
 
				-	"GoVersion": "go1.8",
			
 
				+	"GoVersion": "go1.9",
			
 
				 	"GodepVersion": "v79",
			
 
				 	"Packages": [
			
 
				 		"./..."
			
@@ -22,8 +22,13 @@
 
				 		},
			
 
				 		{
			
 
				 			"ImportPath": "github.com/fatedier/beego/logs",
			
 
				-			"Comment": "v1.7.2-72-gf73c369",
			
 
				-			"Rev": "f73c3692bbd70a83728cb59b2c0423ff95e4ecea"
			
 
				+			"Comment": "v1.7.2-73-g6c6a4f5",
			
 
				+			"Rev": "6c6a4f5bd5eb5a39f7e289b8f345b55f75e7e3e8"
			
 
				+		},
			
 
				+		{
			
 
				+			"ImportPath": "github.com/fatedier/kcp-go",
			
 
				+			"Comment": "v3.15-35-gcd167d2",
			
 
				+			"Rev": "cd167d2f15f451b0f33780ce862fca97adc0331e"
			
 
				 		},
			
 
				 		{
			
 
				 			"ImportPath": "github.com/golang/snappy",
			
@@ -64,6 +69,25 @@
 
				 			"Comment": "v1.1.4-25-g2402e8e",
			
 
				 			"Rev": "2402e8e7a02fc811447d11f881aa9746cdc57983"
			
 
				 		},
			
 
				+		{
			
 
				+			"ImportPath": "github.com/templexxx/cpufeat",
			
 
				+			"Rev": "3794dfbfb04749f896b521032f69383f24c3687e"
			
 
				+		},
			
 
				+		{
			
 
				+			"ImportPath": "github.com/templexxx/reedsolomon",
			
 
				+			"Comment": "0.1.1-4-g7092926",
			
 
				+			"Rev": "7092926d7d05c415fabb892b1464a03f8228ab80"
			
 
				+		},
			
 
				+		{
			
 
				+			"ImportPath": "github.com/templexxx/xor",
			
 
				+			"Comment": "0.1.2",
			
 
				+			"Rev": "0af8e873c554da75f37f2049cdffda804533d44c"
			
 
				+		},
			
 
				+		{
			
 
				+			"ImportPath": "github.com/tjfoc/gmsm/sm4",
			
 
				+			"Comment": "v1.0-42-g21d76de",
			
 
				+			"Rev": "21d76dee237dbbc8dfe1510000b9bf2733635aa1"
			
 
				+		},
			
 
				 		{
			
 
				 			"ImportPath": "github.com/vaughan0/go-ini",
			
 
				 			"Rev": "a98ad7ee00ec53921f08832bc06ecf7fd600e6a1"
			
--- a/client/control.go
+++ b/client/control.go
@@ -271,9 +271,10 @@ func (ctl *Control) login() (err error) {
 
				 	ctl.conn = conn
			
 
				 	// update runId got from server
			
 
				 	ctl.setRunId(loginRespMsg.RunId)
			
 
				+	config.ClientCommonCfg.ServerUdpPort = loginRespMsg.ServerUdpPort
			
 
				 	ctl.ClearLogPrefix()
			
 
				 	ctl.AddLogPrefix(loginRespMsg.RunId)
			
 
				-	ctl.Info("login to server success, get run id [%s]", loginRespMsg.RunId)
			
 
				+	ctl.Info("login to server success, get run id [%s], server udp port [%d]", loginRespMsg.RunId, loginRespMsg.ServerUdpPort)
			
 
				 
			
 
				 	// login success, so we let closedCh available again
			
 
				 	ctl.closedCh = make(chan int)
			
--- a/client/proxy.go
+++ b/client/proxy.go
@@ -15,6 +15,7 @@
 
				 package client
			
 
				 
			
 
				 import (
			
 
				+	"bytes"
			
 
				 	"fmt"
			
 
				 	"io"
			
 
				 	"net"
			
@@ -29,6 +30,7 @@ import (
 
				 	frpIo "github.com/fatedier/frp/utils/io"
			
 
				 	"github.com/fatedier/frp/utils/log"
			
 
				 	frpNet "github.com/fatedier/frp/utils/net"
			
 
				+	"github.com/fatedier/frp/utils/pool"
			
 
				 )
			
 
				 
			
 
				 // Proxy defines how to deal with work connections for different proxy type.
			
@@ -72,6 +74,11 @@ func NewProxy(ctl *Control, pxyConf config.ProxyConf) (pxy Proxy) {
 
				 			BaseProxy: baseProxy,
			
 
				 			cfg:       cfg,
			
 
				 		}
			
 
				+	case *config.XtcpProxyConf:
			
 
				+		pxy = &XtcpProxy{
			
 
				+			BaseProxy: baseProxy,
			
 
				+			cfg:       cfg,
			
 
				+		}
			
 
				 	}
			
 
				 	return
			
 
				 }
			
@@ -108,7 +115,8 @@ func (pxy *TcpProxy) Close() {
 
				 }
			
 
				 
			
 
				 func (pxy *TcpProxy) InWorkConn(conn frpNet.Conn) {
			
 
				-	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf, conn)
			
 
				+	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf, conn,
			
 
				+		[]byte(config.ClientCommonCfg.PrivilegeToken))
			
 
				 }
			
 
				 
			
 
				 // HTTP
			
@@ -136,7 +144,8 @@ func (pxy *HttpProxy) Close() {
 
				 }
			
 
				 
			
 
				 func (pxy *HttpProxy) InWorkConn(conn frpNet.Conn) {
			
 
				-	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf, conn)
			
 
				+	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf, conn,
			
 
				+		[]byte(config.ClientCommonCfg.PrivilegeToken))
			
 
				 }
			
 
				 
			
 
				 // HTTPS
			
@@ -164,7 +173,8 @@ func (pxy *HttpsProxy) Close() {
 
				 }
			
 
				 
			
 
				 func (pxy *HttpsProxy) InWorkConn(conn frpNet.Conn) {
			
 
				-	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf, conn)
			
 
				+	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf, conn,
			
 
				+		[]byte(config.ClientCommonCfg.PrivilegeToken))
			
 
				 }
			
 
				 
			
 
				 // STCP
			
@@ -192,7 +202,101 @@ func (pxy *StcpProxy) Close() {
 
				 }
			
 
				 
			
 
				 func (pxy *StcpProxy) InWorkConn(conn frpNet.Conn) {
			
 
				-	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf, conn)
			
 
				+	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf, conn,
			
 
				+		[]byte(config.ClientCommonCfg.PrivilegeToken))
			
 
				+}
			
 
				+
			
 
				+// XTCP
			
 
				+type XtcpProxy struct {
			
 
				+	BaseProxy
			
 
				+
			
 
				+	cfg         *config.XtcpProxyConf
			
 
				+	proxyPlugin plugin.Plugin
			
 
				+}
			
 
				+
			
 
				+func (pxy *XtcpProxy) Run() (err error) {
			
 
				+	if pxy.cfg.Plugin != "" {
			
 
				+		pxy.proxyPlugin, err = plugin.Create(pxy.cfg.Plugin, pxy.cfg.PluginParams)
			
 
				+		if err != nil {
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (pxy *XtcpProxy) Close() {
			
 
				+	if pxy.proxyPlugin != nil {
			
 
				+		pxy.proxyPlugin.Close()
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (pxy *XtcpProxy) InWorkConn(conn frpNet.Conn) {
			
 
				+	defer conn.Close()
			
 
				+	var natHoleSidMsg msg.NatHoleSid
			
 
				+	err := msg.ReadMsgInto(conn, &natHoleSidMsg)
			
 
				+	if err != nil {
			
 
				+		pxy.Error("xtcp read from workConn error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	natHoleClientMsg := &msg.NatHoleClient{
			
 
				+		ProxyName: pxy.cfg.ProxyName,
			
 
				+		Sid:       natHoleSidMsg.Sid,
			
 
				+	}
			
 
				+	raddr, _ := net.ResolveUDPAddr("udp",
			
 
				+		fmt.Sprintf("%s:%d", config.ClientCommonCfg.ServerAddr, config.ClientCommonCfg.ServerUdpPort))
			
 
				+	clientConn, err := net.DialUDP("udp", nil, raddr)
			
 
				+	defer clientConn.Close()
			
 
				+
			
 
				+	err = msg.WriteMsg(clientConn, natHoleClientMsg)
			
 
				+	if err != nil {
			
 
				+		pxy.Error("send natHoleClientMsg to server error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// Wait for client address at most 5 seconds.
			
 
				+	var natHoleRespMsg msg.NatHoleResp
			
 
				+	clientConn.SetReadDeadline(time.Now().Add(5 * time.Second))
			
 
				+
			
 
				+	buf := pool.GetBuf(1024)
			
 
				+	n, err := clientConn.Read(buf)
			
 
				+	if err != nil {
			
 
				+		pxy.Error("get natHoleRespMsg error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+	err = msg.ReadMsgInto(bytes.NewReader(buf[:n]), &natHoleRespMsg)
			
 
				+	if err != nil {
			
 
				+		pxy.Error("get natHoleRespMsg error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+	clientConn.SetReadDeadline(time.Time{})
			
 
				+	clientConn.Close()
			
 
				+	pxy.Trace("get natHoleRespMsg, sid [%s], client address [%s]", natHoleRespMsg.Sid, natHoleRespMsg.ClientAddr)
			
 
				+
			
 
				+	// Send sid to vistor udp address.
			
 
				+	time.Sleep(time.Second)
			
 
				+	laddr, _ := net.ResolveUDPAddr("udp", clientConn.LocalAddr().String())
			
 
				+	daddr, err := net.ResolveUDPAddr("udp", natHoleRespMsg.VistorAddr)
			
 
				+	if err != nil {
			
 
				+		pxy.Error("resolve vistor udp address error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	lConn, err := net.DialUDP("udp", laddr, daddr)
			
 
				+	if err != nil {
			
 
				+		pxy.Error("dial vistor udp address error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+	lConn.Write([]byte(natHoleRespMsg.Sid))
			
 
				+
			
 
				+	kcpConn, err := frpNet.NewKcpConnFromUdp(lConn, true, natHoleRespMsg.VistorAddr)
			
 
				+	if err != nil {
			
 
				+		pxy.Error("create kcp connection from udp connection error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	HandleTcpWorkConnection(&pxy.cfg.LocalSvrConf, pxy.proxyPlugin, &pxy.cfg.BaseProxyConf,
			
 
				+		frpNet.WrapConn(kcpConn), []byte(pxy.cfg.Sk))
			
 
				 }
			
 
				 
			
 
				 // UDP
			
@@ -302,7 +406,7 @@ func (pxy *UdpProxy) InWorkConn(conn frpNet.Conn) {
 
				 
			
 
				 // Common handler for tcp work connections.
			
 
				 func HandleTcpWorkConnection(localInfo *config.LocalSvrConf, proxyPlugin plugin.Plugin,
			
 
				-	baseInfo *config.BaseProxyConf, workConn frpNet.Conn) {
			
 
				+	baseInfo *config.BaseProxyConf, workConn frpNet.Conn, encKey []byte) {
			
 
				 
			
 
				 	var (
			
 
				 		remote io.ReadWriteCloser
			
@@ -310,7 +414,7 @@ func HandleTcpWorkConnection(localInfo *config.LocalSvrConf, proxyPlugin plugin.
 
				 	)
			
 
				 	remote = workConn
			
 
				 	if baseInfo.UseEncryption {
			
 
				-		remote, err = frpIo.WithEncryption(remote, []byte(config.ClientCommonCfg.PrivilegeToken))
			
 
				+		remote, err = frpIo.WithEncryption(remote, encKey)
			
 
				 		if err != nil {
			
 
				 			workConn.Error("create encryption stream error: %v", err)
			
 
				 			return
			
--- a/client/vistor.go
+++ b/client/vistor.go
@@ -15,15 +15,23 @@
 
				 package client
			
 
				 
			
 
				 import (
			
 
				+	"bytes"
			
 
				+	"fmt"
			
 
				 	"io"
			
 
				+	"net"
			
 
				+	"strconv"
			
 
				+	"strings"
			
 
				 	"sync"
			
 
				 	"time"
			
 
				 
			
 
				+	"golang.org/x/net/ipv4"
			
 
				+
			
 
				 	"github.com/fatedier/frp/models/config"
			
 
				 	"github.com/fatedier/frp/models/msg"
			
 
				 	frpIo "github.com/fatedier/frp/utils/io"
			
 
				 	"github.com/fatedier/frp/utils/log"
			
 
				 	frpNet "github.com/fatedier/frp/utils/net"
			
 
				+	"github.com/fatedier/frp/utils/pool"
			
 
				 	"github.com/fatedier/frp/utils/util"
			
 
				 )
			
 
				 
			
@@ -45,6 +53,11 @@ func NewVistor(ctl *Control, pxyConf config.ProxyConf) (vistor Vistor) {
 
				 			BaseVistor: baseVistor,
			
 
				 			cfg:        cfg,
			
 
				 		}
			
 
				+	case *config.XtcpProxyConf:
			
 
				+		vistor = &XtcpVistor{
			
 
				+			BaseVistor: baseVistor,
			
 
				+			cfg:        cfg,
			
 
				+		}
			
 
				 	}
			
 
				 	return
			
 
				 }
			
@@ -143,3 +156,163 @@ func (sv *StcpVistor) handleConn(userConn frpNet.Conn) {
 
				 
			
 
				 	frpIo.Join(userConn, remote)
			
 
				 }
			
 
				+
			
 
				+type XtcpVistor struct {
			
 
				+	BaseVistor
			
 
				+
			
 
				+	cfg *config.XtcpProxyConf
			
 
				+}
			
 
				+
			
 
				+func (sv *XtcpVistor) Run() (err error) {
			
 
				+	sv.l, err = frpNet.ListenTcp(sv.cfg.BindAddr, int64(sv.cfg.BindPort))
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	go sv.worker()
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (sv *XtcpVistor) Close() {
			
 
				+	sv.l.Close()
			
 
				+}
			
 
				+
			
 
				+func (sv *XtcpVistor) worker() {
			
 
				+	for {
			
 
				+		conn, err := sv.l.Accept()
			
 
				+		if err != nil {
			
 
				+			sv.Warn("stcp local listener closed")
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		go sv.handleConn(conn)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (sv *XtcpVistor) handleConn(userConn frpNet.Conn) {
			
 
				+	defer userConn.Close()
			
 
				+
			
 
				+	sv.Debug("get a new xtcp user connection")
			
 
				+	if config.ClientCommonCfg.ServerUdpPort == 0 {
			
 
				+		sv.Error("xtcp is not supported by server")
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	raddr, err := net.ResolveUDPAddr("udp",
			
 
				+		fmt.Sprintf("%s:%d", config.ClientCommonCfg.ServerAddr, config.ClientCommonCfg.ServerUdpPort))
			
 
				+	vistorConn, err := net.DialUDP("udp", nil, raddr)
			
 
				+	defer vistorConn.Close()
			
 
				+
			
 
				+	now := time.Now().Unix()
			
 
				+	natHoleVistorMsg := &msg.NatHoleVistor{
			
 
				+		ProxyName: sv.cfg.ServerName,
			
 
				+		SignKey:   util.GetAuthKey(sv.cfg.Sk, now),
			
 
				+		Timestamp: now,
			
 
				+	}
			
 
				+	err = msg.WriteMsg(vistorConn, natHoleVistorMsg)
			
 
				+	if err != nil {
			
 
				+		sv.Warn("send natHoleVistorMsg to server error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// Wait for client address at most 10 seconds.
			
 
				+	var natHoleRespMsg msg.NatHoleResp
			
 
				+	vistorConn.SetReadDeadline(time.Now().Add(10 * time.Second))
			
 
				+	buf := pool.GetBuf(1024)
			
 
				+	n, err := vistorConn.Read(buf)
			
 
				+	if err != nil {
			
 
				+		sv.Warn("get natHoleRespMsg error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	err = msg.ReadMsgInto(bytes.NewReader(buf[:n]), &natHoleRespMsg)
			
 
				+	if err != nil {
			
 
				+		sv.Warn("get natHoleRespMsg error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+	vistorConn.SetReadDeadline(time.Time{})
			
 
				+	pool.PutBuf(buf)
			
 
				+
			
 
				+	sv.Trace("get natHoleRespMsg, sid [%s], client address [%s]", natHoleRespMsg.Sid, natHoleRespMsg.ClientAddr)
			
 
				+
			
 
				+	// Close vistorConn, so we can use it's local address.
			
 
				+	vistorConn.Close()
			
 
				+
			
 
				+	// Send detect message.
			
 
				+	array := strings.Split(natHoleRespMsg.ClientAddr, ":")
			
 
				+	if len(array) <= 1 {
			
 
				+		sv.Error("get natHoleResp client address error: %s", natHoleRespMsg.ClientAddr)
			
 
				+		return
			
 
				+	}
			
 
				+	laddr, _ := net.ResolveUDPAddr("udp", vistorConn.LocalAddr().String())
			
 
				+	/*
			
 
				+		for i := 1000; i < 65000; i++ {
			
 
				+			sv.sendDetectMsg(array[0], int64(i), laddr, "a")
			
 
				+		}
			
 
				+	*/
			
 
				+	port, err := strconv.ParseInt(array[1], 10, 64)
			
 
				+	if err != nil {
			
 
				+		sv.Error("get natHoleResp client address error: %s", natHoleRespMsg.ClientAddr)
			
 
				+		return
			
 
				+	}
			
 
				+	sv.sendDetectMsg(array[0], int64(port), laddr, []byte(natHoleRespMsg.Sid))
			
 
				+	sv.Trace("send all detect msg done")
			
 
				+
			
 
				+	// Listen for vistorConn's address and wait for client connection.
			
 
				+	lConn, _ := net.ListenUDP("udp", laddr)
			
 
				+	lConn.SetReadDeadline(time.Now().Add(5 * time.Second))
			
 
				+	sidBuf := pool.GetBuf(1024)
			
 
				+	n, _, err = lConn.ReadFromUDP(sidBuf)
			
 
				+	if err != nil {
			
 
				+		sv.Warn("get sid from client error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+	lConn.SetReadDeadline(time.Time{})
			
 
				+	if string(sidBuf[:n]) != natHoleRespMsg.Sid {
			
 
				+		sv.Warn("incorrect sid from client")
			
 
				+		return
			
 
				+	}
			
 
				+	sv.Info("nat hole connection make success, sid [%s]", string(sidBuf[:n]))
			
 
				+	pool.PutBuf(sidBuf)
			
 
				+
			
 
				+	var remote io.ReadWriteCloser
			
 
				+	remote, err = frpNet.NewKcpConnFromUdp(lConn, false, natHoleRespMsg.ClientAddr)
			
 
				+	if err != nil {
			
 
				+		sv.Error("create kcp connection from udp connection error: %v", err)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	if sv.cfg.UseEncryption {
			
 
				+		remote, err = frpIo.WithEncryption(remote, []byte(sv.cfg.Sk))
			
 
				+		if err != nil {
			
 
				+			sv.Error("create encryption stream error: %v", err)
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if sv.cfg.UseCompression {
			
 
				+		remote = frpIo.WithCompression(remote)
			
 
				+	}
			
 
				+
			
 
				+	frpIo.Join(userConn, remote)
			
 
				+	sv.Debug("join connections closed")
			
 
				+}
			
 
				+
			
 
				+func (sv *XtcpVistor) sendDetectMsg(addr string, port int64, laddr *net.UDPAddr, content []byte) (err error) {
			
 
				+	daddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", addr, port))
			
 
				+	if err != nil {
			
 
				+		return err
			
 
				+	}
			
 
				+
			
 
				+	tConn, err := net.DialUDP("udp", laddr, daddr)
			
 
				+	if err != nil {
			
 
				+		return err
			
 
				+	}
			
 
				+
			
 
				+	uConn := ipv4.NewConn(tConn)
			
 
				+	uConn.SetTTL(3)
			
 
				+
			
 
				+	tConn.Write(content)
			
 
				+	tConn.Close()
			
 
				+	return nil
			
 
				+}
			
--- a/conf/frpc_full.ini
+++ b/conf/frpc_full.ini
@@ -141,3 +141,21 @@ bind_addr = 127.0.0.1
 
				 bind_port = 9000
			
 
				 use_encryption = false
			
 
				 use_compression = false
			
 
				+
			
 
				+[p2p_tcp]
			
 
				+type = xtcp
			
 
				+sk = abcdefg
			
 
				+local_ip = 127.0.0.1
			
 
				+local_port = 22
			
 
				+use_encryption = false
			
 
				+use_compression = false
			
 
				+
			
 
				+[p2p_tcp_vistor]
			
 
				+role = vistor
			
 
				+type = xtcp
			
 
				+server_name = p2p_tcp
			
 
				+sk = abcdefg
			
 
				+bind_addr = 127.0.0.1
			
 
				+bind_port = 9001
			
 
				+use_encryption = false
			
 
				+use_compression = false
			
--- a/conf/frps_full.ini
+++ b/conf/frps_full.ini
@@ -5,6 +5,9 @@
 
				 bind_addr = 0.0.0.0
			
 
				 bind_port = 7000
			
 
				 
			
 
				+# udp port to help make udp hole to penetrate nat
			
 
				+bind_udp_port = 7001
			
 
				+
			
 
				 # udp port used for kcp protocol, it can be same with 'bind_port'
			
 
				 # if not set, kcp is disabled in frps
			
 
				 kcp_bind_port = 7000
			
--- a/models/config/client_common.go
+++ b/models/config/client_common.go
@@ -30,6 +30,7 @@ type ClientCommonConf struct {
 
				 	ConfigFile        string
			
 
				 	ServerAddr        string
			
 
				 	ServerPort        int64
			
 
				+	ServerUdpPort     int64 // this is specified by login response message from frps
			
 
				 	HttpProxy         string
			
 
				 	LogFile           string
			
 
				 	LogWay            string
			
@@ -55,6 +56,7 @@ func GetDeaultClientCommonConf() *ClientCommonConf {
 
				 		ConfigFile:        "./frpc.ini",
			
 
				 		ServerAddr:        "0.0.0.0",
			
 
				 		ServerPort:        7000,
			
 
				+		ServerUdpPort:     0,
			
 
				 		HttpProxy:         "",
			
 
				 		LogFile:           "console",
			
 
				 		LogWay:            "console",
			
--- a/models/config/proxy.go
+++ b/models/config/proxy.go
@@ -36,6 +36,7 @@ func init() {
 
				 	proxyConfTypeMap[consts.HttpProxy] = reflect.TypeOf(HttpProxyConf{})
			
 
				 	proxyConfTypeMap[consts.HttpsProxy] = reflect.TypeOf(HttpsProxyConf{})
			
 
				 	proxyConfTypeMap[consts.StcpProxy] = reflect.TypeOf(StcpProxyConf{})
			
 
				+	proxyConfTypeMap[consts.XtcpProxy] = reflect.TypeOf(XtcpProxyConf{})
			
 
				 }
			
 
				 
			
 
				 // NewConfByType creates a empty ProxyConf object by proxyType.
			
@@ -672,6 +673,95 @@ func (cfg *StcpProxyConf) Check() (err error) {
 
				 	return
			
 
				 }
			
 
				 
			
 
				+// XTCP
			
 
				+type XtcpProxyConf struct {
			
 
				+	BaseProxyConf
			
 
				+
			
 
				+	Role string `json:"role"`
			
 
				+	Sk   string `json:"sk"`
			
 
				+
			
 
				+	// used in role server
			
 
				+	LocalSvrConf
			
 
				+	PluginConf
			
 
				+
			
 
				+	// used in role vistor
			
 
				+	ServerName string `json:"server_name"`
			
 
				+	BindAddr   string `json:"bind_addr"`
			
 
				+	BindPort   int    `json:"bind_port"`
			
 
				+}
			
 
				+
			
 
				+func (cfg *XtcpProxyConf) Compare(cmp ProxyConf) bool {
			
 
				+	cmpConf, ok := cmp.(*XtcpProxyConf)
			
 
				+	if !ok {
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	if !cfg.BaseProxyConf.compare(&cmpConf.BaseProxyConf) ||
			
 
				+		!cfg.LocalSvrConf.compare(&cmpConf.LocalSvrConf) ||
			
 
				+		!cfg.PluginConf.compare(&cmpConf.PluginConf) ||
			
 
				+		cfg.Role != cmpConf.Role ||
			
 
				+		cfg.Sk != cmpConf.Sk ||
			
 
				+		cfg.ServerName != cmpConf.ServerName ||
			
 
				+		cfg.BindAddr != cmpConf.BindAddr ||
			
 
				+		cfg.BindPort != cmpConf.BindPort {
			
 
				+		return false
			
 
				+	}
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+// Only for role server.
			
 
				+func (cfg *XtcpProxyConf) LoadFromMsg(pMsg *msg.NewProxy) {
			
 
				+	cfg.BaseProxyConf.LoadFromMsg(pMsg)
			
 
				+	cfg.Sk = pMsg.Sk
			
 
				+}
			
 
				+
			
 
				+func (cfg *XtcpProxyConf) LoadFromFile(name string, section ini.Section) (err error) {
			
 
				+	if err = cfg.BaseProxyConf.LoadFromFile(name, section); err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	tmpStr := section["role"]
			
 
				+	if tmpStr == "server" || tmpStr == "vistor" {
			
 
				+		cfg.Role = tmpStr
			
 
				+	} else {
			
 
				+		cfg.Role = "server"
			
 
				+	}
			
 
				+
			
 
				+	cfg.Sk = section["sk"]
			
 
				+
			
 
				+	if tmpStr == "vistor" {
			
 
				+		prefix := section["prefix"]
			
 
				+		cfg.ServerName = prefix + section["server_name"]
			
 
				+		if cfg.BindAddr = section["bind_addr"]; cfg.BindAddr == "" {
			
 
				+			cfg.BindAddr = "127.0.0.1"
			
 
				+		}
			
 
				+
			
 
				+		if tmpStr, ok := section["bind_port"]; ok {
			
 
				+			if cfg.BindPort, err = strconv.Atoi(tmpStr); err != nil {
			
 
				+				return fmt.Errorf("Parse conf error: proxy [%s] bind_port error", name)
			
 
				+			}
			
 
				+		} else {
			
 
				+			return fmt.Errorf("Parse conf error: proxy [%s] bind_port not found", name)
			
 
				+		}
			
 
				+	} else {
			
 
				+		if err = cfg.PluginConf.LoadFromFile(name, section); err != nil {
			
 
				+			if err = cfg.LocalSvrConf.LoadFromFile(name, section); err != nil {
			
 
				+				return
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (cfg *XtcpProxyConf) UnMarshalToMsg(pMsg *msg.NewProxy) {
			
 
				+	cfg.BaseProxyConf.UnMarshalToMsg(pMsg)
			
 
				+	pMsg.Sk = cfg.Sk
			
 
				+}
			
 
				+
			
 
				+func (cfg *XtcpProxyConf) Check() (err error) {
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				 // if len(startProxy) is 0, start all
			
 
				 // otherwise just start proxies in startProxy map
			
 
				 func LoadProxyConfFromFile(prefix string, conf ini.File, startProxy map[string]struct{}) (
			
--- a/models/config/server_common.go
+++ b/models/config/server_common.go
@@ -30,6 +30,7 @@ type ServerCommonConf struct {
 
				 	ConfigFile    string
			
 
				 	BindAddr      string
			
 
				 	BindPort      int64
			
 
				+	BindUdpPort   int64
			
 
				 	KcpBindPort   int64
			
 
				 	ProxyBindAddr string
			
 
				 
			
@@ -66,6 +67,7 @@ func GetDefaultServerCommonConf() *ServerCommonConf {
 
				 		ConfigFile:       "./frps.ini",
			
 
				 		BindAddr:         "0.0.0.0",
			
 
				 		BindPort:         7000,
			
 
				+		BindUdpPort:      0,
			
 
				 		KcpBindPort:      0,
			
 
				 		ProxyBindAddr:    "0.0.0.0",
			
 
				 		VhostHttpPort:    0,
			
@@ -111,6 +113,14 @@ func LoadServerCommonConf(conf ini.File) (cfg *ServerCommonConf, err error) {
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	tmpStr, ok = conf.Get("common", "bind_udp_port")
			
 
				+	if ok {
			
 
				+		v, err = strconv.ParseInt(tmpStr, 10, 64)
			
 
				+		if err == nil {
			
 
				+			cfg.BindUdpPort = v
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	tmpStr, ok = conf.Get("common", "kcp_bind_port")
			
 
				 	if ok {
			
 
				 		v, err = strconv.ParseInt(tmpStr, 10, 64)
			
--- a/models/consts/consts.go
+++ b/models/consts/consts.go
@@ -28,4 +28,5 @@ var (
 
				 	HttpProxy  string = "http"
			
 
				 	HttpsProxy string = "https"
			
 
				 	StcpProxy  string = "stcp"
			
 
				+	XtcpProxy  string = "xtcp"
			
 
				 )
			
--- a/models/msg/msg.go
+++ b/models/msg/msg.go
@@ -33,6 +33,10 @@ const (
 
				 	TypePing              = 'h'
			
 
				 	TypePong              = '4'
			
 
				 	TypeUdpPacket         = 'u'
			
 
				+	TypeNatHoleVistor     = 'i'
			
 
				+	TypeNatHoleClient     = 'n'
			
 
				+	TypeNatHoleResp       = 'm'
			
 
				+	TypeNatHoleSid        = '5'
			
 
				 )
			
 
				 
			
 
				 var (
			
@@ -57,6 +61,10 @@ func init() {
 
				 	TypeMap[TypePing] = reflect.TypeOf(Ping{})
			
 
				 	TypeMap[TypePong] = reflect.TypeOf(Pong{})
			
 
				 	TypeMap[TypeUdpPacket] = reflect.TypeOf(UdpPacket{})
			
 
				+	TypeMap[TypeNatHoleVistor] = reflect.TypeOf(NatHoleVistor{})
			
 
				+	TypeMap[TypeNatHoleClient] = reflect.TypeOf(NatHoleClient{})
			
 
				+	TypeMap[TypeNatHoleResp] = reflect.TypeOf(NatHoleResp{})
			
 
				+	TypeMap[TypeNatHoleSid] = reflect.TypeOf(NatHoleSid{})
			
 
				 
			
 
				 	for k, v := range TypeMap {
			
 
				 		TypeStringMap[v] = k
			
@@ -82,9 +90,10 @@ type Login struct {
 
				 }
			
 
				 
			
 
				 type LoginResp struct {
			
 
				-	Version string `json:"version"`
			
 
				-	RunId   string `json:"run_id"`
			
 
				-	Error   string `json:"error"`
			
 
				+	Version       string `json:"version"`
			
 
				+	RunId         string `json:"run_id"`
			
 
				+	ServerUdpPort int64  `json:"server_udp_port"`
			
 
				+	Error         string `json:"error"`
			
 
				 }
			
 
				 
			
 
				 // When frpc login success, send this message to frps for running a new proxy.
			
@@ -153,3 +162,24 @@ type UdpPacket struct {
 
				 	LocalAddr  *net.UDPAddr `json:"l"`
			
 
				 	RemoteAddr *net.UDPAddr `json:"r"`
			
 
				 }
			
 
				+
			
 
				+type NatHoleVistor struct {
			
 
				+	ProxyName string `json:"proxy_name"`
			
 
				+	SignKey   string `json:"sign_key"`
			
 
				+	Timestamp int64  `json:"timestamp"`
			
 
				+}
			
 
				+
			
 
				+type NatHoleClient struct {
			
 
				+	ProxyName string `json:"proxy_name"`
			
 
				+	Sid       string `json:"sid"`
			
 
				+}
			
 
				+
			
 
				+type NatHoleResp struct {
			
 
				+	Sid        string `json:"sid"`
			
 
				+	VistorAddr string `json:"vistor_addr"`
			
 
				+	ClientAddr string `json:"client_addr"`
			
 
				+}
			
 
				+
			
 
				+type NatHoleSid struct {
			
 
				+	Sid string `json；"sid"`
			
 
				+}
			
--- a/server/control.go
+++ b/server/control.go
@@ -97,9 +97,10 @@ func NewControl(svr *Service, ctlConn net.Conn, loginMsg *msg.Login) *Control {
 
				 // Start send a login success message to client and start working.
			
 
				 func (ctl *Control) Start() {
			
 
				 	loginRespMsg := &msg.LoginResp{
			
 
				-		Version: version.Full(),
			
 
				-		RunId:   ctl.runId,
			
 
				-		Error:   "",
			
 
				+		Version:       version.Full(),
			
 
				+		RunId:         ctl.runId,
			
 
				+		ServerUdpPort: config.ServerCommonCfg.BindUdpPort,
			
 
				+		Error:         "",
			
 
				 	}
			
 
				 	msg.WriteMsg(ctl.conn, loginRespMsg)
			
 
				 
			
--- a/server/nathole.go
+++ b/server/nathole.go
@@ -0,0 +1,182 @@
 
				+package server
			
 
				+
			
 
				+import (
			
 
				+	"bytes"
			
 
				+	"fmt"
			
 
				+	"net"
			
 
				+	"sync"
			
 
				+	"time"
			
 
				+
			
 
				+	"github.com/fatedier/frp/models/msg"
			
 
				+	"github.com/fatedier/frp/utils/errors"
			
 
				+	"github.com/fatedier/frp/utils/log"
			
 
				+	"github.com/fatedier/frp/utils/pool"
			
 
				+	"github.com/fatedier/frp/utils/util"
			
 
				+)
			
 
				+
			
 
				+// Timeout seconds.
			
 
				+var NatHoleTimeout int64 = 10
			
 
				+
			
 
				+type NatHoleController struct {
			
 
				+	listener *net.UDPConn
			
 
				+
			
 
				+	clientCfgs map[string]*NatHoleClientCfg
			
 
				+	sessions   map[string]*NatHoleSession
			
 
				+
			
 
				+	mu sync.RWMutex
			
 
				+}
			
 
				+
			
 
				+func NewNatHoleController(udpBindAddr string) (nc *NatHoleController, err error) {
			
 
				+	addr, err := net.ResolveUDPAddr("udp", udpBindAddr)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	lconn, err := net.ListenUDP("udp", addr)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	nc = &NatHoleController{
			
 
				+		listener:   lconn,
			
 
				+		clientCfgs: make(map[string]*NatHoleClientCfg),
			
 
				+		sessions:   make(map[string]*NatHoleSession),
			
 
				+	}
			
 
				+	return nc, nil
			
 
				+}
			
 
				+
			
 
				+func (nc *NatHoleController) ListenClient(name string, sk string) (sidCh chan string) {
			
 
				+	clientCfg := &NatHoleClientCfg{
			
 
				+		Name:  name,
			
 
				+		Sk:    sk,
			
 
				+		SidCh: make(chan string),
			
 
				+	}
			
 
				+	nc.mu.Lock()
			
 
				+	nc.clientCfgs[name] = clientCfg
			
 
				+	nc.mu.Unlock()
			
 
				+	return clientCfg.SidCh
			
 
				+}
			
 
				+
			
 
				+func (nc *NatHoleController) CloseClient(name string) {
			
 
				+	nc.mu.Lock()
			
 
				+	defer nc.mu.Unlock()
			
 
				+	delete(nc.clientCfgs, name)
			
 
				+}
			
 
				+
			
 
				+func (nc *NatHoleController) Run() {
			
 
				+	for {
			
 
				+		buf := pool.GetBuf(1024)
			
 
				+		n, raddr, err := nc.listener.ReadFromUDP(buf)
			
 
				+		if err != nil {
			
 
				+			log.Trace("nat hole listener read from udp error: %v", err)
			
 
				+			return
			
 
				+		}
			
 
				+
			
 
				+		rd := bytes.NewReader(buf[:n])
			
 
				+		rawMsg, err := msg.ReadMsg(rd)
			
 
				+		if err != nil {
			
 
				+			log.Trace("read nat hole message error: %v", err)
			
 
				+			continue
			
 
				+		}
			
 
				+
			
 
				+		switch m := rawMsg.(type) {
			
 
				+		case *msg.NatHoleVistor:
			
 
				+			go nc.HandleVistor(m, raddr)
			
 
				+		case *msg.NatHoleClient:
			
 
				+			go nc.HandleClient(m, raddr)
			
 
				+		default:
			
 
				+			log.Trace("error nat hole message type")
			
 
				+			continue
			
 
				+		}
			
 
				+		pool.PutBuf(buf)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (nc *NatHoleController) GenSid() string {
			
 
				+	t := time.Now().Unix()
			
 
				+	id, _ := util.RandId()
			
 
				+	return fmt.Sprintf("%d%s", t, id)
			
 
				+}
			
 
				+
			
 
				+func (nc *NatHoleController) HandleVistor(m *msg.NatHoleVistor, raddr *net.UDPAddr) {
			
 
				+	sid := nc.GenSid()
			
 
				+	session := &NatHoleSession{
			
 
				+		Sid:        sid,
			
 
				+		VistorAddr: raddr,
			
 
				+		NotifyCh:   make(chan struct{}, 0),
			
 
				+	}
			
 
				+	nc.mu.Lock()
			
 
				+	clientCfg, ok := nc.clientCfgs[m.ProxyName]
			
 
				+	if !ok || m.SignKey != util.GetAuthKey(clientCfg.Sk, m.Timestamp) {
			
 
				+		nc.mu.Unlock()
			
 
				+		return
			
 
				+	}
			
 
				+	nc.sessions[sid] = session
			
 
				+	nc.mu.Unlock()
			
 
				+	log.Trace("handle vistor message, sid [%s]", sid)
			
 
				+
			
 
				+	defer func() {
			
 
				+		nc.mu.Lock()
			
 
				+		delete(nc.sessions, sid)
			
 
				+		nc.mu.Unlock()
			
 
				+	}()
			
 
				+
			
 
				+	err := errors.PanicToError(func() {
			
 
				+		clientCfg.SidCh <- sid
			
 
				+	})
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// Wait client connections.
			
 
				+	select {
			
 
				+	case <-session.NotifyCh:
			
 
				+		resp := nc.GenNatHoleResponse(raddr, session)
			
 
				+		log.Trace("send nat hole response to vistor")
			
 
				+		nc.listener.WriteToUDP(resp, raddr)
			
 
				+	case <-time.After(time.Duration(NatHoleTimeout) * time.Second):
			
 
				+		return
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (nc *NatHoleController) HandleClient(m *msg.NatHoleClient, raddr *net.UDPAddr) {
			
 
				+	nc.mu.RLock()
			
 
				+	session, ok := nc.sessions[m.Sid]
			
 
				+	nc.mu.RUnlock()
			
 
				+	if !ok {
			
 
				+		return
			
 
				+	}
			
 
				+	log.Trace("handle client message, sid [%s]", session.Sid)
			
 
				+	session.ClientAddr = raddr
			
 
				+	session.NotifyCh <- struct{}{}
			
 
				+
			
 
				+	resp := nc.GenNatHoleResponse(raddr, session)
			
 
				+	log.Trace("send nat hole response to client")
			
 
				+	nc.listener.WriteToUDP(resp, raddr)
			
 
				+}
			
 
				+
			
 
				+func (nc *NatHoleController) GenNatHoleResponse(raddr *net.UDPAddr, session *NatHoleSession) []byte {
			
 
				+	m := &msg.NatHoleResp{
			
 
				+		Sid:        session.Sid,
			
 
				+		VistorAddr: session.VistorAddr.String(),
			
 
				+		ClientAddr: session.ClientAddr.String(),
			
 
				+	}
			
 
				+	b := bytes.NewBuffer(nil)
			
 
				+	err := msg.WriteMsg(b, m)
			
 
				+	if err != nil {
			
 
				+		return []byte("")
			
 
				+	}
			
 
				+	return b.Bytes()
			
 
				+}
			
 
				+
			
 
				+type NatHoleSession struct {
			
 
				+	Sid        string
			
 
				+	VistorAddr *net.UDPAddr
			
 
				+	ClientAddr *net.UDPAddr
			
 
				+
			
 
				+	NotifyCh chan struct{}
			
 
				+}
			
 
				+
			
 
				+type NatHoleClientCfg struct {
			
 
				+	Name  string
			
 
				+	Sk    string
			
 
				+	SidCh chan string
			
 
				+}
			
--- a/server/proxy.go
+++ b/server/proxy.go
@@ -148,6 +148,11 @@ func NewProxy(ctl *Control, pxyConf config.ProxyConf) (pxy Proxy, err error) {
 
				 			BaseProxy: basePxy,
			
 
				 			cfg:       cfg,
			
 
				 		}
			
 
				+	case *config.XtcpProxyConf:
			
 
				+		pxy = &XtcpProxy{
			
 
				+			BaseProxy: basePxy,
			
 
				+			cfg:       cfg,
			
 
				+		}
			
 
				 	default:
			
 
				 		return pxy, fmt.Errorf("proxy type not support")
			
 
				 	}
			
@@ -306,6 +311,54 @@ func (pxy *StcpProxy) Close() {
 
				 	pxy.ctl.svr.vistorManager.CloseListener(pxy.GetName())
			
 
				 }
			
 
				 
			
 
				+type XtcpProxy struct {
			
 
				+	BaseProxy
			
 
				+	cfg *config.XtcpProxyConf
			
 
				+
			
 
				+	closeCh chan struct{}
			
 
				+}
			
 
				+
			
 
				+func (pxy *XtcpProxy) Run() error {
			
 
				+	if pxy.ctl.svr.natHoleController == nil {
			
 
				+		pxy.Error("udp port for xtcp is not specified.")
			
 
				+		return fmt.Errorf("xtcp is not supported in frps")
			
 
				+	}
			
 
				+	sidCh := pxy.ctl.svr.natHoleController.ListenClient(pxy.GetName(), pxy.cfg.Sk)
			
 
				+	go func() {
			
 
				+		for {
			
 
				+			select {
			
 
				+			case <-pxy.closeCh:
			
 
				+				break
			
 
				+			case sid := <-sidCh:
			
 
				+				workConn, err := pxy.GetWorkConnFromPool()
			
 
				+				if err != nil {
			
 
				+					continue
			
 
				+				}
			
 
				+				m := &msg.NatHoleSid{
			
 
				+					Sid: sid,
			
 
				+				}
			
 
				+				err = msg.WriteMsg(workConn, m)
			
 
				+				if err != nil {
			
 
				+					pxy.Warn("write nat hole sid package error, %v", err)
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}()
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+func (pxy *XtcpProxy) GetConf() config.ProxyConf {
			
 
				+	return pxy.cfg
			
 
				+}
			
 
				+
			
 
				+func (pxy *XtcpProxy) Close() {
			
 
				+	pxy.BaseProxy.Close()
			
 
				+	pxy.ctl.svr.natHoleController.CloseClient(pxy.GetName())
			
 
				+	errors.PanicToError(func() {
			
 
				+		close(pxy.closeCh)
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				 type UdpProxy struct {
			
 
				 	BaseProxy
			
 
				 	cfg *config.UdpProxyConf
			
--- a/server/service.go
+++ b/server/service.go
@@ -58,6 +58,9 @@ type Service struct {
 
				 
			
 
				 	// Manage all vistor listeners.
			
 
				 	vistorManager *VistorManager
			
 
				+
			
 
				+	// Controller for nat hole connections.
			
 
				+	natHoleController *NatHoleController
			
 
				 }
			
 
				 
			
 
				 func NewService() (svr *Service, err error) {
			
@@ -66,36 +69,37 @@ func NewService() (svr *Service, err error) {
 
				 		pxyManager:    NewProxyManager(),
			
 
				 		vistorManager: NewVistorManager(),
			
 
				 	}
			
 
				+	cfg := config.ServerCommonCfg
			
 
				 
			
 
				 	// Init assets.
			
 
				-	err = assets.Load(config.ServerCommonCfg.AssetsDir)
			
 
				+	err = assets.Load(cfg.AssetsDir)
			
 
				 	if err != nil {
			
 
				 		err = fmt.Errorf("Load assets error: %v", err)
			
 
				 		return
			
 
				 	}
			
 
				 
			
 
				 	// Listen for accepting connections from client.
			
 
				-	svr.listener, err = frpNet.ListenTcp(config.ServerCommonCfg.BindAddr, config.ServerCommonCfg.BindPort)
			
 
				+	svr.listener, err = frpNet.ListenTcp(cfg.BindAddr, cfg.BindPort)
			
 
				 	if err != nil {
			
 
				 		err = fmt.Errorf("Create server listener error, %v", err)
			
 
				 		return
			
 
				 	}
			
 
				-	log.Info("frps tcp listen on %s:%d", config.ServerCommonCfg.BindAddr, config.ServerCommonCfg.BindPort)
			
 
				+	log.Info("frps tcp listen on %s:%d", cfg.BindAddr, cfg.BindPort)
			
 
				 
			
 
				 	// Listen for accepting connections from client using kcp protocol.
			
 
				-	if config.ServerCommonCfg.KcpBindPort > 0 {
			
 
				-		svr.kcpListener, err = frpNet.ListenKcp(config.ServerCommonCfg.BindAddr, config.ServerCommonCfg.KcpBindPort)
			
 
				+	if cfg.KcpBindPort > 0 {
			
 
				+		svr.kcpListener, err = frpNet.ListenKcp(cfg.BindAddr, cfg.KcpBindPort)
			
 
				 		if err != nil {
			
 
				-			err = fmt.Errorf("Listen on kcp address udp [%s:%d] error: %v", config.ServerCommonCfg.BindAddr, config.ServerCommonCfg.KcpBindPort, err)
			
 
				+			err = fmt.Errorf("Listen on kcp address udp [%s:%d] error: %v", cfg.BindAddr, cfg.KcpBindPort, err)
			
 
				 			return
			
 
				 		}
			
 
				-		log.Info("frps kcp listen on udp %s:%d", config.ServerCommonCfg.BindAddr, config.ServerCommonCfg.BindPort)
			
 
				+		log.Info("frps kcp listen on udp %s:%d", cfg.BindAddr, cfg.BindPort)
			
 
				 	}
			
 
				 
			
 
				 	// Create http vhost muxer.
			
 
				-	if config.ServerCommonCfg.VhostHttpPort > 0 {
			
 
				+	if cfg.VhostHttpPort > 0 {
			
 
				 		var l frpNet.Listener
			
 
				-		l, err = frpNet.ListenTcp(config.ServerCommonCfg.ProxyBindAddr, config.ServerCommonCfg.VhostHttpPort)
			
 
				+		l, err = frpNet.ListenTcp(cfg.ProxyBindAddr, cfg.VhostHttpPort)
			
 
				 		if err != nil {
			
 
				 			err = fmt.Errorf("Create vhost http listener error, %v", err)
			
 
				 			return
			
@@ -105,13 +109,13 @@ func NewService() (svr *Service, err error) {
 
				 			err = fmt.Errorf("Create vhost httpMuxer error, %v", err)
			
 
				 			return
			
 
				 		}
			
 
				-		log.Info("http service listen on %s:%d", config.ServerCommonCfg.ProxyBindAddr, config.ServerCommonCfg.VhostHttpPort)
			
 
				+		log.Info("http service listen on %s:%d", cfg.ProxyBindAddr, cfg.VhostHttpPort)
			
 
				 	}
			
 
				 
			
 
				 	// Create https vhost muxer.
			
 
				-	if config.ServerCommonCfg.VhostHttpsPort > 0 {
			
 
				+	if cfg.VhostHttpsPort > 0 {
			
 
				 		var l frpNet.Listener
			
 
				-		l, err = frpNet.ListenTcp(config.ServerCommonCfg.ProxyBindAddr, config.ServerCommonCfg.VhostHttpsPort)
			
 
				+		l, err = frpNet.ListenTcp(cfg.ProxyBindAddr, cfg.VhostHttpsPort)
			
 
				 		if err != nil {
			
 
				 			err = fmt.Errorf("Create vhost https listener error, %v", err)
			
 
				 			return
			
@@ -121,22 +125,38 @@ func NewService() (svr *Service, err error) {
 
				 			err = fmt.Errorf("Create vhost httpsMuxer error, %v", err)
			
 
				 			return
			
 
				 		}
			
 
				-		log.Info("https service listen on %s:%d", config.ServerCommonCfg.ProxyBindAddr, config.ServerCommonCfg.VhostHttpsPort)
			
 
				+		log.Info("https service listen on %s:%d", cfg.ProxyBindAddr, cfg.VhostHttpsPort)
			
 
				+	}
			
 
				+
			
 
				+	// Create nat hole controller.
			
 
				+	if cfg.BindUdpPort > 0 {
			
 
				+		var nc *NatHoleController
			
 
				+		addr := fmt.Sprintf("%s:%d", cfg.BindAddr, cfg.BindUdpPort)
			
 
				+		nc, err = NewNatHoleController(addr)
			
 
				+		if err != nil {
			
 
				+			err = fmt.Errorf("Create nat hole controller error, %v", err)
			
 
				+			return
			
 
				+		}
			
 
				+		svr.natHoleController = nc
			
 
				+		log.Info("nat hole udp service listen on %s:%d", cfg.BindAddr, cfg.BindUdpPort)
			
 
				 	}
			
 
				 
			
 
				 	// Create dashboard web server.
			
 
				-	if config.ServerCommonCfg.DashboardPort > 0 {
			
 
				-		err = RunDashboardServer(config.ServerCommonCfg.BindAddr, config.ServerCommonCfg.DashboardPort)
			
 
				+	if cfg.DashboardPort > 0 {
			
 
				+		err = RunDashboardServer(cfg.BindAddr, cfg.DashboardPort)
			
 
				 		if err != nil {
			
 
				 			err = fmt.Errorf("Create dashboard web server error, %v", err)
			
 
				 			return
			
 
				 		}
			
 
				-		log.Info("Dashboard listen on %s:%d", config.ServerCommonCfg.BindAddr, config.ServerCommonCfg.DashboardPort)
			
 
				+		log.Info("Dashboard listen on %s:%d", cfg.BindAddr, cfg.DashboardPort)
			
 
				 	}
			
 
				 	return
			
 
				 }
			
 
				 
			
 
				 func (svr *Service) Run() {
			
 
				+	if svr.natHoleController != nil {
			
 
				+		go svr.natHoleController.Run()
			
 
				+	}
			
 
				 	if config.ServerCommonCfg.KcpBindPort > 0 {
			
 
				 		go svr.HandleListener(svr.kcpListener)
			
 
				 	}
			
--- a/utils/net/kcp.go
+++ b/utils/net/kcp.go
@@ -20,7 +20,7 @@ import (
 
				 
			
 
				 	"github.com/fatedier/frp/utils/log"
			
 
				 
			
 
				-	kcp "github.com/xtaci/kcp-go"
			
 
				+	kcp "github.com/fatedier/kcp-go"
			
 
				 )
			
 
				 
			
 
				 type KcpListener struct {
			
@@ -85,3 +85,7 @@ func (l *KcpListener) Close() error {
 
				 	}
			
 
				 	return nil
			
 
				 }
			
 
				+
			
 
				+func NewKcpConnFromUdp(conn *net.UDPConn, connected bool, raddr string) (net.Conn, error) {
			
 
				+	return kcp.NewConnEx(1, connected, raddr, nil, 10, 3, conn)
			
 
				+}
			
--- a/vendor/github.com/fatedier/beego/logs/console.go
+++ b/vendor/github.com/fatedier/beego/logs/console.go
@@ -42,6 +42,7 @@ var colors = []brush{
 
				 	newBrush("1;32"), // Notice             green
			
 
				 	newBrush("1;34"), // Informational      blue
			
 
				 	newBrush("1;34"), // Debug              blue
			
 
				+	newBrush("1;34"), // Trace              blue
			
 
				 }
			
 
				 
			
 
				 // consoleWriter implements LoggerInterface and writes messages to terminal.
			
@@ -55,7 +56,7 @@ type consoleWriter struct {
 
				 func NewConsole() Logger {
			
 
				 	cw := &consoleWriter{
			
 
				 		lg:       newLogWriter(os.Stdout),
			
 
				-		Level:    LevelDebug,
			
 
				+		Level:    LevelTrace,
			
 
				 		Colorful: runtime.GOOS != "windows",
			
 
				 	}
			
 
				 	return cw
			
--- a/vendor/github.com/fatedier/kcp-go/.gitignore
+++ b/vendor/github.com/fatedier/kcp-go/.gitignore
@@ -0,0 +1,24 @@
 
				+# Compiled Object files, Static and Dynamic libs (Shared Objects)
			
 
				+*.o
			
 
				+*.a
			
 
				+*.so
			
 
				+
			
 
				+# Folders
			
 
				+_obj
			
 
				+_test
			
 
				+
			
 
				+# Architecture specific extensions/prefixes
			
 
				+*.[568vq]
			
 
				+[568vq].out
			
 
				+
			
 
				+*.cgo1.go
			
 
				+*.cgo2.c
			
 
				+_cgo_defun.c
			
 
				+_cgo_gotypes.go
			
 
				+_cgo_export.*
			
 
				+
			
 
				+_testmain.go
			
 
				+
			
 
				+*.exe
			
 
				+*.test
			
 
				+*.prof
			
--- a/vendor/github.com/fatedier/kcp-go/.travis.yml
+++ b/vendor/github.com/fatedier/kcp-go/.travis.yml
@@ -0,0 +1,15 @@
 
				+language: go
			
 
				+go:
			
 
				+    - 1.9
			
 
				+
			
 
				+before_install:
			
 
				+    - go get -t -v ./...
			
 
				+
			
 
				+install:
			
 
				+    - go get github.com/xtaci/kcp-go
			
 
				+
			
 
				+script:
			
 
				+    - go test -coverprofile=coverage.txt -covermode=atomic -bench .
			
 
				+
			
 
				+after_success:
			
 
				+    - bash <(curl -s https://codecov.io/bash)
			
--- a/vendor/github.com/fatedier/kcp-go/LICENSE
+++ b/vendor/github.com/fatedier/kcp-go/LICENSE
@@ -0,0 +1,22 @@
 
				+The MIT License (MIT)
			
 
				+
			
 
				+Copyright (c) 2015 Daniel Fu
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
 
				+
			
--- a/vendor/github.com/fatedier/kcp-go/README.md
+++ b/vendor/github.com/fatedier/kcp-go/README.md
@@ -0,0 +1,172 @@
 
				+<img src="kcp-go.png" alt="kcp-go" height="50px" />
			
 
				+
			
 
				+
			
 
				+[![GoDoc][1]][2] [![Powered][9]][10] [![MIT licensed][11]][12] [![Build Status][3]][4] [![Go Report Card][5]][6] [![Coverage Statusd][7]][8]
			
 
				+
			
 
				+[1]: https://godoc.org/github.com/xtaci/kcp-go?status.svg
			
 
				+[2]: https://godoc.org/github.com/xtaci/kcp-go
			
 
				+[3]: https://travis-ci.org/xtaci/kcp-go.svg?branch=master
			
 
				+[4]: https://travis-ci.org/xtaci/kcp-go
			
 
				+[5]: https://goreportcard.com/badge/github.com/xtaci/kcp-go
			
 
				+[6]: https://goreportcard.com/report/github.com/xtaci/kcp-go
			
 
				+[7]: https://codecov.io/gh/xtaci/kcp-go/branch/master/graph/badge.svg
			
 
				+[8]: https://codecov.io/gh/xtaci/kcp-go
			
 
				+[9]: https://img.shields.io/badge/KCP-Powered-blue.svg
			
 
				+[10]: https://github.com/skywind3000/kcp
			
 
				+[11]: https://img.shields.io/badge/license-MIT-blue.svg
			
 
				+[12]: LICENSE
			
 
				+
			
 
				+## Introduction
			
 
				+
			
 
				+**kcp-go** is a **Production-Grade Reliable-UDP** library for [golang](https://golang.org/). 
			
 
				+
			
 
				+It provides **fast, ordered and error-checked** delivery of streams over **UDP** packets, has been well tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) are running with **kcp-go** at present, including applications like **online games, live broadcasting, file synchronization and network acceleration**.
			
 
				+
			
 
				+[Lastest Release](https://github.com/xtaci/kcp-go/releases)
			
 
				+
			
 
				+## Features
			
 
				+
			
 
				+1. Optimized for **Realtime Online Games, Audio/Video Streaming and Latency-Sensitive Distributed Consensus**.
			
 
				+1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with language specific optimizations.
			
 
				+1. **Cache friendly** and **Memory optimized** design, offers extremely **High Performance** core.
			
 
				+1. Handles **>5K concurrent connections** on a single commodity server.
			
 
				+1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener), a drop-in replacement for [net.TCPConn](https://golang.org/pkg/net/#TCPConn).
			
 
				+1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction)
			
 
				+1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode.
			
 
				+1. **Fixed number of goroutines** created for the entire server application, minimized goroutine context switch.
			
 
				+
			
 
				+## Conventions
			
 
				+
			
 
				+Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
			
 
				+
			
 
				+## Documentation
			
 
				+
			
 
				+For complete documentation, see the associated [Godoc](https://godoc.org/github.com/xtaci/kcp-go).
			
 
				+
			
 
				+## Specification
			
 
				+
			
 
				+<img src="frame.png" alt="Frame Format" height="109px" />
			
 
				+
			
 
				+```
			
 
				++-----------------+
			
 
				+| SESSION         |
			
 
				++-----------------+
			
 
				+| KCP(ARQ)        |
			
 
				++-----------------+
			
 
				+| FEC(OPTIONAL)   |
			
 
				++-----------------+
			
 
				+| CRYPTO(OPTIONAL)|
			
 
				++-----------------+
			
 
				+| UDP(PACKET)     |
			
 
				++-----------------+
			
 
				+| IP              |
			
 
				++-----------------+
			
 
				+| LINK            |
			
 
				++-----------------+
			
 
				+| PHY             |
			
 
				++-----------------+
			
 
				+(LAYER MODEL OF KCP-GO)
			
 
				+```
			
 
				+
			
 
				+
			
 
				+## Usage
			
 
				+
			
 
				+Client:   [full demo](https://github.com/xtaci/kcptun/blob/master/client/main.go)
			
 
				+```go
			
 
				+kcpconn, err := kcp.DialWithOptions("192.168.0.1:10000", nil, 10, 3)
			
 
				+```
			
 
				+Server:   [full demo](https://github.com/xtaci/kcptun/blob/master/server/main.go)
			
 
				+```go
			
 
				+lis, err := kcp.ListenWithOptions(":10000", nil, 10, 3)
			
 
				+```
			
 
				+
			
 
				+## Performance
			
 
				+```
			
 
				+  Model Name:	MacBook Pro
			
 
				+  Model Identifier:	MacBookPro12,1
			
 
				+  Processor Name:	Intel Core i5
			
 
				+  Processor Speed:	2.7 GHz
			
 
				+  Number of Processors:	1
			
 
				+  Total Number of Cores:	2
			
 
				+  L2 Cache (per Core):	256 KB
			
 
				+  L3 Cache:	3 MB
			
 
				+  Memory:	8 GB
			
 
				+```
			
 
				+```
			
 
				+$ go test -v -run=^$ -bench .
			
 
				+beginning tests, encryption:salsa20, fec:10/3
			
 
				+BenchmarkAES128-4          	  200000	      8256 ns/op	 363.33 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkAES192-4          	  200000	      9153 ns/op	 327.74 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkAES256-4          	  200000	     10079 ns/op	 297.64 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkTEA-4             	  100000	     18643 ns/op	 160.91 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkXOR-4             	 5000000	       316 ns/op	9486.46 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkBlowfish-4        	   50000	     35643 ns/op	  84.17 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkNone-4            	30000000	        56.2 ns/op	53371.83 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkCast5-4           	   30000	     44744 ns/op	  67.05 MB/s	       0 B/op	       0 allocs/op
			
 
				+Benchmark3DES-4            	    2000	    639839 ns/op	   4.69 MB/s	       2 B/op	       0 allocs/op
			
 
				+BenchmarkTwofish-4         	   30000	     43368 ns/op	  69.17 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkXTEA-4            	   30000	     57673 ns/op	  52.02 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkSalsa20-4         	  300000	      3917 ns/op	 765.80 MB/s	       0 B/op	       0 allocs/op
			
 
				+BenchmarkFlush-4           	10000000	       226 ns/op	       0 B/op	       0 allocs/op
			
 
				+BenchmarkEchoSpeed4K-4     	    5000	    300030 ns/op	  13.65 MB/s	    5672 B/op	     177 allocs/op
			
 
				+BenchmarkEchoSpeed64K-4    	     500	   3202335 ns/op	  20.47 MB/s	   73295 B/op	    2198 allocs/op
			
 
				+BenchmarkEchoSpeed512K-4   	      50	  24926924 ns/op	  21.03 MB/s	  659339 B/op	   17602 allocs/op
			
 
				+BenchmarkEchoSpeed1M-4     	      20	  64857821 ns/op	  16.17 MB/s	 1772437 B/op	   42869 allocs/op
			
 
				+BenchmarkSinkSpeed4K-4     	   30000	     50230 ns/op	  81.54 MB/s	    2058 B/op	      48 allocs/op
			
 
				+BenchmarkSinkSpeed64K-4    	    2000	    648718 ns/op	 101.02 MB/s	   31165 B/op	     687 allocs/op
			
 
				+BenchmarkSinkSpeed256K-4   	     300	   4635905 ns/op	 113.09 MB/s	  286229 B/op	    5516 allocs/op
			
 
				+BenchmarkSinkSpeed1M-4     	     200	   9566933 ns/op	 109.60 MB/s	  463771 B/op	   10701 allocs/op
			
 
				+PASS
			
 
				+ok  	_/Users/xtaci/.godeps/src/github.com/xtaci/kcp-go	39.689s
			
 
				+```
			
 
				+
			
 
				+## Design Considerations
			
 
				+
			
 
				+1. slice vs. container/list
			
 
				+
			
 
				+`kcp.flush()` loops through the send queue for retransmission checking for every 20ms(interval).
			
 
				+
			
 
				+I've wrote a benchmark for comparing sequential loop through *slice* and *container/list* here:
			
 
				+
			
 
				+https://github.com/xtaci/notes/blob/master/golang/benchmark2/cachemiss_test.go
			
 
				+
			
 
				+```
			
 
				+BenchmarkLoopSlice-4   	2000000000	         0.39 ns/op
			
 
				+BenchmarkLoopList-4    	100000000	        54.6 ns/op
			
 
				+```
			
 
				+
			
 
				+List structure introduces **heavy cache misses** compared to slice which owns better **locality**, 5000 connections with 32 window size and 20ms interval will cost 6us/0.03%(cpu) using slice, and 8.7ms/43.5%(cpu) for list for each `kcp.flush()`.
			
 
				+
			
 
				+2. Timing accuracy vs. syscall clock_gettime
			
 
				+
			
 
				+Timing is **critical** to **RTT estimator**, inaccurate timing introduces false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz), the benchmark for time.Now():
			
 
				+
			
 
				+https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
			
 
				+
			
 
				+```
			
 
				+BenchmarkNow-4         	100000000	        15.6 ns/op
			
 
				+```
			
 
				+
			
 
				+In kcp-go, after each `kcp.output()` function call, current time will be updated upon return, and each `kcp.flush()` will get current time once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(no packet needs to be sent by `kcp.output()`), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
			
 
				+
			
 
				+
			
 
				+## Tuning
			
 
				+
			
 
				+Q: I'm handling >5K connections on my server. the CPU utilization is high.
			
 
				+
			
 
				+A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load.
			
 
				+
			
 
				+## Who is using this?
			
 
				+
			
 
				+1. https://github.com/xtaci/kcptun -- A Secure Tunnel Based On KCP over UDP.
			
 
				+2. https://github.com/getlantern/lantern -- Lantern delivers fast access to the open Internet. 
			
 
				+3. https://github.com/smallnest/rpcx -- A RPC service framework based on net/rpc like alibaba Dubbo and weibo Motan.
			
 
				+4. https://github.com/gonet2/agent -- A gateway for games with stream multiplexing.
			
 
				+5. https://github.com/syncthing/syncthing -- Open Source Continuous File Synchronization.
			
 
				+6. https://play.google.com/store/apps/details?id=com.k17game.k3 -- Battle Zone - Earth 2048, a world-wide strategy game.
			
 
				+
			
 
				+## Links
			
 
				+
			
 
				+1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++
			
 
				+2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol
			
 
				+3. https://github.com/templexxx/reedsolomon -- Reed-Solomon Erasure Coding in Go
			
--- a/vendor/github.com/fatedier/kcp-go/crypt.go
+++ b/vendor/github.com/fatedier/kcp-go/crypt.go
@@ -0,0 +1,288 @@
 
				+package kcp
			
 
				+
			
 
				+import (
			
 
				+	"crypto/aes"
			
 
				+	"crypto/cipher"
			
 
				+	"crypto/des"
			
 
				+	"crypto/sha1"
			
 
				+
			
 
				+	"github.com/templexxx/xor"
			
 
				+	"github.com/tjfoc/gmsm/sm4"
			
 
				+
			
 
				+	"golang.org/x/crypto/blowfish"
			
 
				+	"golang.org/x/crypto/cast5"
			
 
				+	"golang.org/x/crypto/pbkdf2"
			
 
				+	"golang.org/x/crypto/salsa20"
			
 
				+	"golang.org/x/crypto/tea"
			
 
				+	"golang.org/x/crypto/twofish"
			
 
				+	"golang.org/x/crypto/xtea"
			
 
				+)
			
 
				+
			
 
				+var (
			
 
				+	initialVector = []byte{167, 115, 79, 156, 18, 172, 27, 1, 164, 21, 242, 193, 252, 120, 230, 107}
			
 
				+	saltxor       = `sH3CIVoF#rWLtJo6`
			
 
				+)
			
 
				+
			
 
				+// BlockCrypt defines encryption/decryption methods for a given byte slice.
			
 
				+// Notes on implementing: the data to be encrypted contains a builtin
			
 
				+// nonce at the first 16 bytes
			
 
				+type BlockCrypt interface {
			
 
				+	// Encrypt encrypts the whole block in src into dst.
			
 
				+	// Dst and src may point at the same memory.
			
 
				+	Encrypt(dst, src []byte)
			
 
				+
			
 
				+	// Decrypt decrypts the whole block in src into dst.
			
 
				+	// Dst and src may point at the same memory.
			
 
				+	Decrypt(dst, src []byte)
			
 
				+}
			
 
				+
			
 
				+type salsa20BlockCrypt struct {
			
 
				+	key [32]byte
			
 
				+}
			
 
				+
			
 
				+// NewSalsa20BlockCrypt https://en.wikipedia.org/wiki/Salsa20
			
 
				+func NewSalsa20BlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(salsa20BlockCrypt)
			
 
				+	copy(c.key[:], key)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *salsa20BlockCrypt) Encrypt(dst, src []byte) {
			
 
				+	salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
			
 
				+	copy(dst[:8], src[:8])
			
 
				+}
			
 
				+func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) {
			
 
				+	salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
			
 
				+	copy(dst[:8], src[:8])
			
 
				+}
			
 
				+
			
 
				+type sm4BlockCrypt struct {
			
 
				+	encbuf []byte
			
 
				+	decbuf []byte
			
 
				+	block  cipher.Block
			
 
				+}
			
 
				+
			
 
				+// NewSM4BlockCrypt https://github.com/tjfoc/gmsm/tree/master/sm4
			
 
				+func NewSM4BlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(sm4BlockCrypt)
			
 
				+	block, err := sm4.NewCipher(key)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	c.block = block
			
 
				+	c.encbuf = make([]byte, sm4.BlockSize)
			
 
				+	c.decbuf = make([]byte, 2*sm4.BlockSize)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
			
 
				+func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
			
 
				+
			
 
				+type twofishBlockCrypt struct {
			
 
				+	encbuf []byte
			
 
				+	decbuf []byte
			
 
				+	block  cipher.Block
			
 
				+}
			
 
				+
			
 
				+// NewTwofishBlockCrypt https://en.wikipedia.org/wiki/Twofish
			
 
				+func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(twofishBlockCrypt)
			
 
				+	block, err := twofish.NewCipher(key)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	c.block = block
			
 
				+	c.encbuf = make([]byte, twofish.BlockSize)
			
 
				+	c.decbuf = make([]byte, 2*twofish.BlockSize)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
			
 
				+func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
			
 
				+
			
 
				+type tripleDESBlockCrypt struct {
			
 
				+	encbuf []byte
			
 
				+	decbuf []byte
			
 
				+	block  cipher.Block
			
 
				+}
			
 
				+
			
 
				+// NewTripleDESBlockCrypt https://en.wikipedia.org/wiki/Triple_DES
			
 
				+func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(tripleDESBlockCrypt)
			
 
				+	block, err := des.NewTripleDESCipher(key)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	c.block = block
			
 
				+	c.encbuf = make([]byte, des.BlockSize)
			
 
				+	c.decbuf = make([]byte, 2*des.BlockSize)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
			
 
				+func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
			
 
				+
			
 
				+type cast5BlockCrypt struct {
			
 
				+	encbuf []byte
			
 
				+	decbuf []byte
			
 
				+	block  cipher.Block
			
 
				+}
			
 
				+
			
 
				+// NewCast5BlockCrypt https://en.wikipedia.org/wiki/CAST-128
			
 
				+func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(cast5BlockCrypt)
			
 
				+	block, err := cast5.NewCipher(key)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	c.block = block
			
 
				+	c.encbuf = make([]byte, cast5.BlockSize)
			
 
				+	c.decbuf = make([]byte, 2*cast5.BlockSize)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
			
 
				+func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
			
 
				+
			
 
				+type blowfishBlockCrypt struct {
			
 
				+	encbuf []byte
			
 
				+	decbuf []byte
			
 
				+	block  cipher.Block
			
 
				+}
			
 
				+
			
 
				+// NewBlowfishBlockCrypt https://en.wikipedia.org/wiki/Blowfish_(cipher)
			
 
				+func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(blowfishBlockCrypt)
			
 
				+	block, err := blowfish.NewCipher(key)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	c.block = block
			
 
				+	c.encbuf = make([]byte, blowfish.BlockSize)
			
 
				+	c.decbuf = make([]byte, 2*blowfish.BlockSize)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
			
 
				+func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
			
 
				+
			
 
				+type aesBlockCrypt struct {
			
 
				+	encbuf []byte
			
 
				+	decbuf []byte
			
 
				+	block  cipher.Block
			
 
				+}
			
 
				+
			
 
				+// NewAESBlockCrypt https://en.wikipedia.org/wiki/Advanced_Encryption_Standard
			
 
				+func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(aesBlockCrypt)
			
 
				+	block, err := aes.NewCipher(key)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	c.block = block
			
 
				+	c.encbuf = make([]byte, aes.BlockSize)
			
 
				+	c.decbuf = make([]byte, 2*aes.BlockSize)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
			
 
				+func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
			
 
				+
			
 
				+type teaBlockCrypt struct {
			
 
				+	encbuf []byte
			
 
				+	decbuf []byte
			
 
				+	block  cipher.Block
			
 
				+}
			
 
				+
			
 
				+// NewTEABlockCrypt https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm
			
 
				+func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(teaBlockCrypt)
			
 
				+	block, err := tea.NewCipherWithRounds(key, 16)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	c.block = block
			
 
				+	c.encbuf = make([]byte, tea.BlockSize)
			
 
				+	c.decbuf = make([]byte, 2*tea.BlockSize)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
			
 
				+func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
			
 
				+
			
 
				+type xteaBlockCrypt struct {
			
 
				+	encbuf []byte
			
 
				+	decbuf []byte
			
 
				+	block  cipher.Block
			
 
				+}
			
 
				+
			
 
				+// NewXTEABlockCrypt https://en.wikipedia.org/wiki/XTEA
			
 
				+func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(xteaBlockCrypt)
			
 
				+	block, err := xtea.NewCipher(key)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	c.block = block
			
 
				+	c.encbuf = make([]byte, xtea.BlockSize)
			
 
				+	c.decbuf = make([]byte, 2*xtea.BlockSize)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
			
 
				+func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
			
 
				+
			
 
				+type simpleXORBlockCrypt struct {
			
 
				+	xortbl []byte
			
 
				+}
			
 
				+
			
 
				+// NewSimpleXORBlockCrypt simple xor with key expanding
			
 
				+func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	c := new(simpleXORBlockCrypt)
			
 
				+	c.xortbl = pbkdf2.Key(key, []byte(saltxor), 32, mtuLimit, sha1.New)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
			
 
				+func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
			
 
				+
			
 
				+type noneBlockCrypt struct{}
			
 
				+
			
 
				+// NewNoneBlockCrypt does nothing but copying
			
 
				+func NewNoneBlockCrypt(key []byte) (BlockCrypt, error) {
			
 
				+	return new(noneBlockCrypt), nil
			
 
				+}
			
 
				+
			
 
				+func (c *noneBlockCrypt) Encrypt(dst, src []byte) { copy(dst, src) }
			
 
				+func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
			
 
				+
			
 
				+// packet encryption with local CFB mode
			
 
				+func encrypt(block cipher.Block, dst, src, buf []byte) {
			
 
				+	blocksize := block.BlockSize()
			
 
				+	tbl := buf[:blocksize]
			
 
				+	block.Encrypt(tbl, initialVector)
			
 
				+	n := len(src) / blocksize
			
 
				+	base := 0
			
 
				+	for i := 0; i < n; i++ {
			
 
				+		xor.BytesSrc1(dst[base:], src[base:], tbl)
			
 
				+		block.Encrypt(tbl, dst[base:])
			
 
				+		base += blocksize
			
 
				+	}
			
 
				+	xor.BytesSrc0(dst[base:], src[base:], tbl)
			
 
				+}
			
 
				+
			
 
				+func decrypt(block cipher.Block, dst, src, buf []byte) {
			
 
				+	blocksize := block.BlockSize()
			
 
				+	tbl := buf[:blocksize]
			
 
				+	next := buf[blocksize:]
			
 
				+	block.Encrypt(tbl, initialVector)
			
 
				+	n := len(src) / blocksize
			
 
				+	base := 0
			
 
				+	for i := 0; i < n; i++ {
			
 
				+		block.Encrypt(next, src[base:])
			
 
				+		xor.BytesSrc1(dst[base:], src[base:], tbl)
			
 
				+		tbl, next = next, tbl
			
 
				+		base += blocksize
			
 
				+	}
			
 
				+	xor.BytesSrc0(dst[base:], src[base:], tbl)
			
 
				+}
			
--- a/vendor/github.com/fatedier/kcp-go/donate.png
+++ b/vendor/github.com/fatedier/kcp-go/donate.png
--- a/vendor/github.com/fatedier/kcp-go/fec.go
+++ b/vendor/github.com/fatedier/kcp-go/fec.go
@@ -0,0 +1,303 @@
 
				+package kcp
			
 
				+
			
 
				+import (
			
 
				+	"encoding/binary"
			
 
				+	"sync/atomic"
			
 
				+
			
 
				+	"github.com/templexxx/reedsolomon"
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	fecHeaderSize      = 6
			
 
				+	fecHeaderSizePlus2 = fecHeaderSize + 2 // plus 2B data size
			
 
				+	typeData           = 0xf1
			
 
				+	typeFEC            = 0xf2
			
 
				+)
			
 
				+
			
 
				+type (
			
 
				+	// fecPacket is a decoded FEC packet
			
 
				+	fecPacket struct {
			
 
				+		seqid uint32
			
 
				+		flag  uint16
			
 
				+		data  []byte
			
 
				+	}
			
 
				+
			
 
				+	// fecDecoder for decoding incoming packets
			
 
				+	fecDecoder struct {
			
 
				+		rxlimit      int // queue size limit
			
 
				+		dataShards   int
			
 
				+		parityShards int
			
 
				+		shardSize    int
			
 
				+		rx           []fecPacket // ordered receive queue
			
 
				+
			
 
				+		// caches
			
 
				+		decodeCache [][]byte
			
 
				+		flagCache   []bool
			
 
				+
			
 
				+		// RS decoder
			
 
				+		codec reedsolomon.Encoder
			
 
				+	}
			
 
				+)
			
 
				+
			
 
				+func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
			
 
				+	if dataShards <= 0 || parityShards <= 0 {
			
 
				+		return nil
			
 
				+	}
			
 
				+	if rxlimit < dataShards+parityShards {
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				+	fec := new(fecDecoder)
			
 
				+	fec.rxlimit = rxlimit
			
 
				+	fec.dataShards = dataShards
			
 
				+	fec.parityShards = parityShards
			
 
				+	fec.shardSize = dataShards + parityShards
			
 
				+	enc, err := reedsolomon.New(dataShards, parityShards)
			
 
				+	if err != nil {
			
 
				+		return nil
			
 
				+	}
			
 
				+	fec.codec = enc
			
 
				+	fec.decodeCache = make([][]byte, fec.shardSize)
			
 
				+	fec.flagCache = make([]bool, fec.shardSize)
			
 
				+	return fec
			
 
				+}
			
 
				+
			
 
				+// decodeBytes a fec packet
			
 
				+func (dec *fecDecoder) decodeBytes(data []byte) fecPacket {
			
 
				+	var pkt fecPacket
			
 
				+	pkt.seqid = binary.LittleEndian.Uint32(data)
			
 
				+	pkt.flag = binary.LittleEndian.Uint16(data[4:])
			
 
				+	// allocate memory & copy
			
 
				+	buf := xmitBuf.Get().([]byte)[:len(data)-6]
			
 
				+	copy(buf, data[6:])
			
 
				+	pkt.data = buf
			
 
				+	return pkt
			
 
				+}
			
 
				+
			
 
				+// decode a fec packet
			
 
				+func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
			
 
				+	// insertion
			
 
				+	n := len(dec.rx) - 1
			
 
				+	insertIdx := 0
			
 
				+	for i := n; i >= 0; i-- {
			
 
				+		if pkt.seqid == dec.rx[i].seqid { // de-duplicate
			
 
				+			xmitBuf.Put(pkt.data)
			
 
				+			return nil
			
 
				+		} else if _itimediff(pkt.seqid, dec.rx[i].seqid) > 0 { // insertion
			
 
				+			insertIdx = i + 1
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// insert into ordered rx queue
			
 
				+	if insertIdx == n+1 {
			
 
				+		dec.rx = append(dec.rx, pkt)
			
 
				+	} else {
			
 
				+		dec.rx = append(dec.rx, fecPacket{})
			
 
				+		copy(dec.rx[insertIdx+1:], dec.rx[insertIdx:]) // shift right
			
 
				+		dec.rx[insertIdx] = pkt
			
 
				+	}
			
 
				+
			
 
				+	// shard range for current packet
			
 
				+	shardBegin := pkt.seqid - pkt.seqid%uint32(dec.shardSize)
			
 
				+	shardEnd := shardBegin + uint32(dec.shardSize) - 1
			
 
				+
			
 
				+	// max search range in ordered queue for current shard
			
 
				+	searchBegin := insertIdx - int(pkt.seqid%uint32(dec.shardSize))
			
 
				+	if searchBegin < 0 {
			
 
				+		searchBegin = 0
			
 
				+	}
			
 
				+	searchEnd := searchBegin + dec.shardSize - 1
			
 
				+	if searchEnd >= len(dec.rx) {
			
 
				+		searchEnd = len(dec.rx) - 1
			
 
				+	}
			
 
				+
			
 
				+	// re-construct datashards
			
 
				+	if searchEnd-searchBegin+1 >= dec.dataShards {
			
 
				+		var numshard, numDataShard, first, maxlen int
			
 
				+
			
 
				+		// zero cache
			
 
				+		shards := dec.decodeCache
			
 
				+		shardsflag := dec.flagCache
			
 
				+		for k := range dec.decodeCache {
			
 
				+			shards[k] = nil
			
 
				+			shardsflag[k] = false
			
 
				+		}
			
 
				+
			
 
				+		// shard assembly
			
 
				+		for i := searchBegin; i <= searchEnd; i++ {
			
 
				+			seqid := dec.rx[i].seqid
			
 
				+			if _itimediff(seqid, shardEnd) > 0 {
			
 
				+				break
			
 
				+			} else if _itimediff(seqid, shardBegin) >= 0 {
			
 
				+				shards[seqid%uint32(dec.shardSize)] = dec.rx[i].data
			
 
				+				shardsflag[seqid%uint32(dec.shardSize)] = true
			
 
				+				numshard++
			
 
				+				if dec.rx[i].flag == typeData {
			
 
				+					numDataShard++
			
 
				+				}
			
 
				+				if numshard == 1 {
			
 
				+					first = i
			
 
				+				}
			
 
				+				if len(dec.rx[i].data) > maxlen {
			
 
				+					maxlen = len(dec.rx[i].data)
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if numDataShard == dec.dataShards {
			
 
				+			// case 1:  no lost data shards
			
 
				+			dec.rx = dec.freeRange(first, numshard, dec.rx)
			
 
				+		} else if numshard >= dec.dataShards {
			
 
				+			// case 2: data shard lost, but  recoverable from parity shard
			
 
				+			for k := range shards {
			
 
				+				if shards[k] != nil {
			
 
				+					dlen := len(shards[k])
			
 
				+					shards[k] = shards[k][:maxlen]
			
 
				+					xorBytes(shards[k][dlen:], shards[k][dlen:], shards[k][dlen:])
			
 
				+				}
			
 
				+			}
			
 
				+			if err := dec.codec.ReconstructData(shards); err == nil {
			
 
				+				for k := range shards[:dec.dataShards] {
			
 
				+					if !shardsflag[k] {
			
 
				+						recovered = append(recovered, shards[k])
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+			dec.rx = dec.freeRange(first, numshard, dec.rx)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// keep rxlimit
			
 
				+	if len(dec.rx) > dec.rxlimit {
			
 
				+		if dec.rx[0].flag == typeData { // record unrecoverable data
			
 
				+			atomic.AddUint64(&DefaultSnmp.FECShortShards, 1)
			
 
				+		}
			
 
				+		dec.rx = dec.freeRange(0, 1, dec.rx)
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+// free a range of fecPacket, and zero for GC recycling
			
 
				+func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
			
 
				+	for i := first; i < first+n; i++ { // free
			
 
				+		xmitBuf.Put(q[i].data)
			
 
				+	}
			
 
				+	copy(q[first:], q[first+n:])
			
 
				+	for i := 0; i < n; i++ { // dereference data
			
 
				+		q[len(q)-1-i].data = nil
			
 
				+	}
			
 
				+	return q[:len(q)-n]
			
 
				+}
			
 
				+
			
 
				+type (
			
 
				+	// fecEncoder for encoding outgoing packets
			
 
				+	fecEncoder struct {
			
 
				+		dataShards   int
			
 
				+		parityShards int
			
 
				+		shardSize    int
			
 
				+		paws         uint32 // Protect Against Wrapped Sequence numbers
			
 
				+		next         uint32 // next seqid
			
 
				+
			
 
				+		shardCount int // count the number of datashards collected
			
 
				+		maxSize    int // record maximum data length in datashard
			
 
				+
			
 
				+		headerOffset  int // FEC header offset
			
 
				+		payloadOffset int // FEC payload offset
			
 
				+
			
 
				+		// caches
			
 
				+		shardCache  [][]byte
			
 
				+		encodeCache [][]byte
			
 
				+
			
 
				+		// RS encoder
			
 
				+		codec reedsolomon.Encoder
			
 
				+	}
			
 
				+)
			
 
				+
			
 
				+func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
			
 
				+	if dataShards <= 0 || parityShards <= 0 {
			
 
				+		return nil
			
 
				+	}
			
 
				+	fec := new(fecEncoder)
			
 
				+	fec.dataShards = dataShards
			
 
				+	fec.parityShards = parityShards
			
 
				+	fec.shardSize = dataShards + parityShards
			
 
				+	fec.paws = (0xffffffff/uint32(fec.shardSize) - 1) * uint32(fec.shardSize)
			
 
				+	fec.headerOffset = offset
			
 
				+	fec.payloadOffset = fec.headerOffset + fecHeaderSize
			
 
				+
			
 
				+	enc, err := reedsolomon.New(dataShards, parityShards)
			
 
				+	if err != nil {
			
 
				+		return nil
			
 
				+	}
			
 
				+	fec.codec = enc
			
 
				+
			
 
				+	// caches
			
 
				+	fec.encodeCache = make([][]byte, fec.shardSize)
			
 
				+	fec.shardCache = make([][]byte, fec.shardSize)
			
 
				+	for k := range fec.shardCache {
			
 
				+		fec.shardCache[k] = make([]byte, mtuLimit)
			
 
				+	}
			
 
				+	return fec
			
 
				+}
			
 
				+
			
 
				+// encode the packet, output parity shards if we have enough datashards
			
 
				+// the content of returned parityshards will change in next encode
			
 
				+func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
			
 
				+	enc.markData(b[enc.headerOffset:])
			
 
				+	binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
			
 
				+
			
 
				+	// copy data to fec datashards
			
 
				+	sz := len(b)
			
 
				+	enc.shardCache[enc.shardCount] = enc.shardCache[enc.shardCount][:sz]
			
 
				+	copy(enc.shardCache[enc.shardCount], b)
			
 
				+	enc.shardCount++
			
 
				+
			
 
				+	// record max datashard length
			
 
				+	if sz > enc.maxSize {
			
 
				+		enc.maxSize = sz
			
 
				+	}
			
 
				+
			
 
				+	//  calculate Reed-Solomon Erasure Code
			
 
				+	if enc.shardCount == enc.dataShards {
			
 
				+		// bzero each datashard's tail
			
 
				+		for i := 0; i < enc.dataShards; i++ {
			
 
				+			shard := enc.shardCache[i]
			
 
				+			slen := len(shard)
			
 
				+			xorBytes(shard[slen:enc.maxSize], shard[slen:enc.maxSize], shard[slen:enc.maxSize])
			
 
				+		}
			
 
				+
			
 
				+		// construct equal-sized slice with stripped header
			
 
				+		cache := enc.encodeCache
			
 
				+		for k := range cache {
			
 
				+			cache[k] = enc.shardCache[k][enc.payloadOffset:enc.maxSize]
			
 
				+		}
			
 
				+
			
 
				+		// rs encode
			
 
				+		if err := enc.codec.Encode(cache); err == nil {
			
 
				+			ps = enc.shardCache[enc.dataShards:]
			
 
				+			for k := range ps {
			
 
				+				enc.markFEC(ps[k][enc.headerOffset:])
			
 
				+				ps[k] = ps[k][:enc.maxSize]
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// reset counters to zero
			
 
				+		enc.shardCount = 0
			
 
				+		enc.maxSize = 0
			
 
				+	}
			
 
				+
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (enc *fecEncoder) markData(data []byte) {
			
 
				+	binary.LittleEndian.PutUint32(data, enc.next)
			
 
				+	binary.LittleEndian.PutUint16(data[4:], typeData)
			
 
				+	enc.next++
			
 
				+}
			
 
				+
			
 
				+func (enc *fecEncoder) markFEC(data []byte) {
			
 
				+	binary.LittleEndian.PutUint32(data, enc.next)
			
 
				+	binary.LittleEndian.PutUint16(data[4:], typeFEC)
			
 
				+	enc.next = (enc.next + 1) % enc.paws
			
 
				+}
			
--- a/vendor/github.com/fatedier/kcp-go/frame.png
+++ b/vendor/github.com/fatedier/kcp-go/frame.png
--- a/vendor/github.com/fatedier/kcp-go/kcp-go.png
+++ b/vendor/github.com/fatedier/kcp-go/kcp-go.png
--- a/vendor/github.com/fatedier/kcp-go/kcp.go
+++ b/vendor/github.com/fatedier/kcp-go/kcp.go
@@ -0,0 +1,998 @@
 
				+// Package kcp - A Fast and Reliable ARQ Protocol
			
 
				+package kcp
			
 
				+
			
 
				+import (
			
 
				+	"encoding/binary"
			
 
				+	"sync/atomic"
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	IKCP_RTO_NDL     = 30  // no delay min rto
			
 
				+	IKCP_RTO_MIN     = 100 // normal min rto
			
 
				+	IKCP_RTO_DEF     = 200
			
 
				+	IKCP_RTO_MAX     = 60000
			
 
				+	IKCP_CMD_PUSH    = 81 // cmd: push data
			
 
				+	IKCP_CMD_ACK     = 82 // cmd: ack
			
 
				+	IKCP_CMD_WASK    = 83 // cmd: window probe (ask)
			
 
				+	IKCP_CMD_WINS    = 84 // cmd: window size (tell)
			
 
				+	IKCP_ASK_SEND    = 1  // need to send IKCP_CMD_WASK
			
 
				+	IKCP_ASK_TELL    = 2  // need to send IKCP_CMD_WINS
			
 
				+	IKCP_WND_SND     = 32
			
 
				+	IKCP_WND_RCV     = 32
			
 
				+	IKCP_MTU_DEF     = 1400
			
 
				+	IKCP_ACK_FAST    = 3
			
 
				+	IKCP_INTERVAL    = 100
			
 
				+	IKCP_OVERHEAD    = 24
			
 
				+	IKCP_DEADLINK    = 20
			
 
				+	IKCP_THRESH_INIT = 2
			
 
				+	IKCP_THRESH_MIN  = 2
			
 
				+	IKCP_PROBE_INIT  = 7000   // 7 secs to probe window size
			
 
				+	IKCP_PROBE_LIMIT = 120000 // up to 120 secs to probe window
			
 
				+)
			
 
				+
			
 
				+// output_callback is a prototype which ought capture conn and call conn.Write
			
 
				+type output_callback func(buf []byte, size int)
			
 
				+
			
 
				+/* encode 8 bits unsigned int */
			
 
				+func ikcp_encode8u(p []byte, c byte) []byte {
			
 
				+	p[0] = c
			
 
				+	return p[1:]
			
 
				+}
			
 
				+
			
 
				+/* decode 8 bits unsigned int */
			
 
				+func ikcp_decode8u(p []byte, c *byte) []byte {
			
 
				+	*c = p[0]
			
 
				+	return p[1:]
			
 
				+}
			
 
				+
			
 
				+/* encode 16 bits unsigned int (lsb) */
			
 
				+func ikcp_encode16u(p []byte, w uint16) []byte {
			
 
				+	binary.LittleEndian.PutUint16(p, w)
			
 
				+	return p[2:]
			
 
				+}
			
 
				+
			
 
				+/* decode 16 bits unsigned int (lsb) */
			
 
				+func ikcp_decode16u(p []byte, w *uint16) []byte {
			
 
				+	*w = binary.LittleEndian.Uint16(p)
			
 
				+	return p[2:]
			
 
				+}
			
 
				+
			
 
				+/* encode 32 bits unsigned int (lsb) */
			
 
				+func ikcp_encode32u(p []byte, l uint32) []byte {
			
 
				+	binary.LittleEndian.PutUint32(p, l)
			
 
				+	return p[4:]
			
 
				+}
			
 
				+
			
 
				+/* decode 32 bits unsigned int (lsb) */
			
 
				+func ikcp_decode32u(p []byte, l *uint32) []byte {
			
 
				+	*l = binary.LittleEndian.Uint32(p)
			
 
				+	return p[4:]
			
 
				+}
			
 
				+
			
 
				+func _imin_(a, b uint32) uint32 {
			
 
				+	if a <= b {
			
 
				+		return a
			
 
				+	}
			
 
				+	return b
			
 
				+}
			
 
				+
			
 
				+func _imax_(a, b uint32) uint32 {
			
 
				+	if a >= b {
			
 
				+		return a
			
 
				+	}
			
 
				+	return b
			
 
				+}
			
 
				+
			
 
				+func _ibound_(lower, middle, upper uint32) uint32 {
			
 
				+	return _imin_(_imax_(lower, middle), upper)
			
 
				+}
			
 
				+
			
 
				+func _itimediff(later, earlier uint32) int32 {
			
 
				+	return (int32)(later - earlier)
			
 
				+}
			
 
				+
			
 
				+// segment defines a KCP segment
			
 
				+type segment struct {
			
 
				+	conv     uint32
			
 
				+	cmd      uint8
			
 
				+	frg      uint8
			
 
				+	wnd      uint16
			
 
				+	ts       uint32
			
 
				+	sn       uint32
			
 
				+	una      uint32
			
 
				+	rto      uint32
			
 
				+	xmit     uint32
			
 
				+	resendts uint32
			
 
				+	fastack  uint32
			
 
				+	data     []byte
			
 
				+}
			
 
				+
			
 
				+// encode a segment into buffer
			
 
				+func (seg *segment) encode(ptr []byte) []byte {
			
 
				+	ptr = ikcp_encode32u(ptr, seg.conv)
			
 
				+	ptr = ikcp_encode8u(ptr, seg.cmd)
			
 
				+	ptr = ikcp_encode8u(ptr, seg.frg)
			
 
				+	ptr = ikcp_encode16u(ptr, seg.wnd)
			
 
				+	ptr = ikcp_encode32u(ptr, seg.ts)
			
 
				+	ptr = ikcp_encode32u(ptr, seg.sn)
			
 
				+	ptr = ikcp_encode32u(ptr, seg.una)
			
 
				+	ptr = ikcp_encode32u(ptr, uint32(len(seg.data)))
			
 
				+	atomic.AddUint64(&DefaultSnmp.OutSegs, 1)
			
 
				+	return ptr
			
 
				+}
			
 
				+
			
 
				+// KCP defines a single KCP connection
			
 
				+type KCP struct {
			
 
				+	conv, mtu, mss, state                  uint32
			
 
				+	snd_una, snd_nxt, rcv_nxt              uint32
			
 
				+	ssthresh                               uint32
			
 
				+	rx_rttvar, rx_srtt                     int32
			
 
				+	rx_rto, rx_minrto                      uint32
			
 
				+	snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe uint32
			
 
				+	interval, ts_flush                     uint32
			
 
				+	nodelay, updated                       uint32
			
 
				+	ts_probe, probe_wait                   uint32
			
 
				+	dead_link, incr                        uint32
			
 
				+
			
 
				+	fastresend     int32
			
 
				+	nocwnd, stream int32
			
 
				+
			
 
				+	snd_queue []segment
			
 
				+	rcv_queue []segment
			
 
				+	snd_buf   []segment
			
 
				+	rcv_buf   []segment
			
 
				+
			
 
				+	acklist []ackItem
			
 
				+
			
 
				+	buffer []byte
			
 
				+	output output_callback
			
 
				+}
			
 
				+
			
 
				+type ackItem struct {
			
 
				+	sn uint32
			
 
				+	ts uint32
			
 
				+}
			
 
				+
			
 
				+// NewKCP create a new kcp control object, 'conv' must equal in two endpoint
			
 
				+// from the same connection.
			
 
				+func NewKCP(conv uint32, output output_callback) *KCP {
			
 
				+	kcp := new(KCP)
			
 
				+	kcp.conv = conv
			
 
				+	kcp.snd_wnd = IKCP_WND_SND
			
 
				+	kcp.rcv_wnd = IKCP_WND_RCV
			
 
				+	kcp.rmt_wnd = IKCP_WND_RCV
			
 
				+	kcp.mtu = IKCP_MTU_DEF
			
 
				+	kcp.mss = kcp.mtu - IKCP_OVERHEAD
			
 
				+	kcp.buffer = make([]byte, (kcp.mtu+IKCP_OVERHEAD)*3)
			
 
				+	kcp.rx_rto = IKCP_RTO_DEF
			
 
				+	kcp.rx_minrto = IKCP_RTO_MIN
			
 
				+	kcp.interval = IKCP_INTERVAL
			
 
				+	kcp.ts_flush = IKCP_INTERVAL
			
 
				+	kcp.ssthresh = IKCP_THRESH_INIT
			
 
				+	kcp.dead_link = IKCP_DEADLINK
			
 
				+	kcp.output = output
			
 
				+	return kcp
			
 
				+}
			
 
				+
			
 
				+// newSegment creates a KCP segment
			
 
				+func (kcp *KCP) newSegment(size int) (seg segment) {
			
 
				+	seg.data = xmitBuf.Get().([]byte)[:size]
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+// delSegment recycles a KCP segment
			
 
				+func (kcp *KCP) delSegment(seg segment) {
			
 
				+	xmitBuf.Put(seg.data)
			
 
				+}
			
 
				+
			
 
				+// PeekSize checks the size of next message in the recv queue
			
 
				+func (kcp *KCP) PeekSize() (length int) {
			
 
				+	if len(kcp.rcv_queue) == 0 {
			
 
				+		return -1
			
 
				+	}
			
 
				+
			
 
				+	seg := &kcp.rcv_queue[0]
			
 
				+	if seg.frg == 0 {
			
 
				+		return len(seg.data)
			
 
				+	}
			
 
				+
			
 
				+	if len(kcp.rcv_queue) < int(seg.frg+1) {
			
 
				+		return -1
			
 
				+	}
			
 
				+
			
 
				+	for k := range kcp.rcv_queue {
			
 
				+		seg := &kcp.rcv_queue[k]
			
 
				+		length += len(seg.data)
			
 
				+		if seg.frg == 0 {
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+// Recv is user/upper level recv: returns size, returns below zero for EAGAIN
			
 
				+func (kcp *KCP) Recv(buffer []byte) (n int) {
			
 
				+	if len(kcp.rcv_queue) == 0 {
			
 
				+		return -1
			
 
				+	}
			
 
				+
			
 
				+	peeksize := kcp.PeekSize()
			
 
				+	if peeksize < 0 {
			
 
				+		return -2
			
 
				+	}
			
 
				+
			
 
				+	if peeksize > len(buffer) {
			
 
				+		return -3
			
 
				+	}
			
 
				+
			
 
				+	var fast_recover bool
			
 
				+	if len(kcp.rcv_queue) >= int(kcp.rcv_wnd) {
			
 
				+		fast_recover = true
			
 
				+	}
			
 
				+
			
 
				+	// merge fragment
			
 
				+	count := 0
			
 
				+	for k := range kcp.rcv_queue {
			
 
				+		seg := &kcp.rcv_queue[k]
			
 
				+		copy(buffer, seg.data)
			
 
				+		buffer = buffer[len(seg.data):]
			
 
				+		n += len(seg.data)
			
 
				+		count++
			
 
				+		kcp.delSegment(*seg)
			
 
				+		if seg.frg == 0 {
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+	if count > 0 {
			
 
				+		kcp.rcv_queue = kcp.remove_front(kcp.rcv_queue, count)
			
 
				+	}
			
 
				+
			
 
				+	// move available data from rcv_buf -> rcv_queue
			
 
				+	count = 0
			
 
				+	for k := range kcp.rcv_buf {
			
 
				+		seg := &kcp.rcv_buf[k]
			
 
				+		if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
			
 
				+			kcp.rcv_nxt++
			
 
				+			count++
			
 
				+		} else {
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if count > 0 {
			
 
				+		kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
			
 
				+		kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
			
 
				+	}
			
 
				+
			
 
				+	// fast recover
			
 
				+	if len(kcp.rcv_queue) < int(kcp.rcv_wnd) && fast_recover {
			
 
				+		// ready to send back IKCP_CMD_WINS in ikcp_flush
			
 
				+		// tell remote my window size
			
 
				+		kcp.probe |= IKCP_ASK_TELL
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+// Send is user/upper level send, returns below zero for error
			
 
				+func (kcp *KCP) Send(buffer []byte) int {
			
 
				+	var count int
			
 
				+	if len(buffer) == 0 {
			
 
				+		return -1
			
 
				+	}
			
 
				+
			
 
				+	// append to previous segment in streaming mode (if possible)
			
 
				+	if kcp.stream != 0 {
			
 
				+		n := len(kcp.snd_queue)
			
 
				+		if n > 0 {
			
 
				+			seg := &kcp.snd_queue[n-1]
			
 
				+			if len(seg.data) < int(kcp.mss) {
			
 
				+				capacity := int(kcp.mss) - len(seg.data)
			
 
				+				extend := capacity
			
 
				+				if len(buffer) < capacity {
			
 
				+					extend = len(buffer)
			
 
				+				}
			
 
				+
			
 
				+				// grow slice, the underlying cap is guaranteed to
			
 
				+				// be larger than kcp.mss
			
 
				+				oldlen := len(seg.data)
			
 
				+				seg.data = seg.data[:oldlen+extend]
			
 
				+				copy(seg.data[oldlen:], buffer)
			
 
				+				buffer = buffer[extend:]
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if len(buffer) == 0 {
			
 
				+			return 0
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if len(buffer) <= int(kcp.mss) {
			
 
				+		count = 1
			
 
				+	} else {
			
 
				+		count = (len(buffer) + int(kcp.mss) - 1) / int(kcp.mss)
			
 
				+	}
			
 
				+
			
 
				+	if count > 255 {
			
 
				+		return -2
			
 
				+	}
			
 
				+
			
 
				+	if count == 0 {
			
 
				+		count = 1
			
 
				+	}
			
 
				+
			
 
				+	for i := 0; i < count; i++ {
			
 
				+		var size int
			
 
				+		if len(buffer) > int(kcp.mss) {
			
 
				+			size = int(kcp.mss)
			
 
				+		} else {
			
 
				+			size = len(buffer)
			
 
				+		}
			
 
				+		seg := kcp.newSegment(size)
			
 
				+		copy(seg.data, buffer[:size])
			
 
				+		if kcp.stream == 0 { // message mode
			
 
				+			seg.frg = uint8(count - i - 1)
			
 
				+		} else { // stream mode
			
 
				+			seg.frg = 0
			
 
				+		}
			
 
				+		kcp.snd_queue = append(kcp.snd_queue, seg)
			
 
				+		buffer = buffer[size:]
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+func (kcp *KCP) update_ack(rtt int32) {
			
 
				+	// https://tools.ietf.org/html/rfc6298
			
 
				+	var rto uint32
			
 
				+	if kcp.rx_srtt == 0 {
			
 
				+		kcp.rx_srtt = rtt
			
 
				+		kcp.rx_rttvar = rtt >> 1
			
 
				+	} else {
			
 
				+		delta := rtt - kcp.rx_srtt
			
 
				+		kcp.rx_srtt += delta >> 3
			
 
				+		if delta < 0 {
			
 
				+			delta = -delta
			
 
				+		}
			
 
				+		if rtt < kcp.rx_srtt-kcp.rx_rttvar {
			
 
				+			// if the new RTT sample is below the bottom of the range of
			
 
				+			// what an RTT measurement is expected to be.
			
 
				+			// give an 8x reduced weight versus its normal weighting
			
 
				+			kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 5
			
 
				+		} else {
			
 
				+			kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 2
			
 
				+		}
			
 
				+	}
			
 
				+	rto = uint32(kcp.rx_srtt) + _imax_(kcp.interval, uint32(kcp.rx_rttvar)<<2)
			
 
				+	kcp.rx_rto = _ibound_(kcp.rx_minrto, rto, IKCP_RTO_MAX)
			
 
				+}
			
 
				+
			
 
				+func (kcp *KCP) shrink_buf() {
			
 
				+	if len(kcp.snd_buf) > 0 {
			
 
				+		seg := &kcp.snd_buf[0]
			
 
				+		kcp.snd_una = seg.sn
			
 
				+	} else {
			
 
				+		kcp.snd_una = kcp.snd_nxt
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (kcp *KCP) parse_ack(sn uint32) {
			
 
				+	if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	for k := range kcp.snd_buf {
			
 
				+		seg := &kcp.snd_buf[k]
			
 
				+		if sn == seg.sn {
			
 
				+			kcp.delSegment(*seg)
			
 
				+			copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
			
 
				+			kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
			
 
				+			kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
			
 
				+			break
			
 
				+		}
			
 
				+		if _itimediff(sn, seg.sn) < 0 {
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (kcp *KCP) parse_fastack(sn uint32) {
			
 
				+	if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	for k := range kcp.snd_buf {
			
 
				+		seg := &kcp.snd_buf[k]
			
 
				+		if _itimediff(sn, seg.sn) < 0 {
			
 
				+			break
			
 
				+		} else if sn != seg.sn {
			
 
				+			seg.fastack++
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (kcp *KCP) parse_una(una uint32) {
			
 
				+	count := 0
			
 
				+	for k := range kcp.snd_buf {
			
 
				+		seg := &kcp.snd_buf[k]
			
 
				+		if _itimediff(una, seg.sn) > 0 {
			
 
				+			kcp.delSegment(*seg)
			
 
				+			count++
			
 
				+		} else {
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+	if count > 0 {
			
 
				+		kcp.snd_buf = kcp.remove_front(kcp.snd_buf, count)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// ack append
			
 
				+func (kcp *KCP) ack_push(sn, ts uint32) {
			
 
				+	kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
			
 
				+}
			
 
				+
			
 
				+func (kcp *KCP) parse_data(newseg segment) {
			
 
				+	sn := newseg.sn
			
 
				+	if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
			
 
				+		_itimediff(sn, kcp.rcv_nxt) < 0 {
			
 
				+		kcp.delSegment(newseg)
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	n := len(kcp.rcv_buf) - 1
			
 
				+	insert_idx := 0
			
 
				+	repeat := false
			
 
				+	for i := n; i >= 0; i-- {
			
 
				+		seg := &kcp.rcv_buf[i]
			
 
				+		if seg.sn == sn {
			
 
				+			repeat = true
			
 
				+			atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
			
 
				+			break
			
 
				+		}
			
 
				+		if _itimediff(sn, seg.sn) > 0 {
			
 
				+			insert_idx = i + 1
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if !repeat {
			
 
				+		if insert_idx == n+1 {
			
 
				+			kcp.rcv_buf = append(kcp.rcv_buf, newseg)
			
 
				+		} else {
			
 
				+			kcp.rcv_buf = append(kcp.rcv_buf, segment{})
			
 
				+			copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
			
 
				+			kcp.rcv_buf[insert_idx] = newseg
			
 
				+		}
			
 
				+	} else {
			
 
				+		kcp.delSegment(newseg)
			
 
				+	}
			
 
				+
			
 
				+	// move available data from rcv_buf -> rcv_queue
			
 
				+	count := 0
			
 
				+	for k := range kcp.rcv_buf {
			
 
				+		seg := &kcp.rcv_buf[k]
			
 
				+		if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
			
 
				+			kcp.rcv_nxt++
			
 
				+			count++
			
 
				+		} else {
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+	if count > 0 {
			
 
				+		kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
			
 
				+		kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Input when you received a low level packet (eg. UDP packet), call it
			
 
				+// regular indicates a regular packet has received(not from FEC)
			
 
				+func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
			
 
				+	una := kcp.snd_una
			
 
				+	if len(data) < IKCP_OVERHEAD {
			
 
				+		return -1
			
 
				+	}
			
 
				+
			
 
				+	var maxack uint32
			
 
				+	var lastackts uint32
			
 
				+	var flag int
			
 
				+	var inSegs uint64
			
 
				+
			
 
				+	for {
			
 
				+		var ts, sn, length, una, conv uint32
			
 
				+		var wnd uint16
			
 
				+		var cmd, frg uint8
			
 
				+
			
 
				+		if len(data) < int(IKCP_OVERHEAD) {
			
 
				+			break
			
 
				+		}
			
 
				+
			
 
				+		data = ikcp_decode32u(data, &conv)
			
 
				+		if conv != kcp.conv {
			
 
				+			return -1
			
 
				+		}
			
 
				+
			
 
				+		data = ikcp_decode8u(data, &cmd)
			
 
				+		data = ikcp_decode8u(data, &frg)
			
 
				+		data = ikcp_decode16u(data, &wnd)
			
 
				+		data = ikcp_decode32u(data, &ts)
			
 
				+		data = ikcp_decode32u(data, &sn)
			
 
				+		data = ikcp_decode32u(data, &una)
			
 
				+		data = ikcp_decode32u(data, &length)
			
 
				+		if len(data) < int(length) {
			
 
				+			return -2
			
 
				+		}
			
 
				+
			
 
				+		if cmd != IKCP_CMD_PUSH && cmd != IKCP_CMD_ACK &&
			
 
				+			cmd != IKCP_CMD_WASK && cmd != IKCP_CMD_WINS {
			
 
				+			return -3
			
 
				+		}
			
 
				+
			
 
				+		// only trust window updates from regular packets. i.e: latest update
			
 
				+		if regular {
			
 
				+			kcp.rmt_wnd = uint32(wnd)
			
 
				+		}
			
 
				+		kcp.parse_una(una)
			
 
				+		kcp.shrink_buf()
			
 
				+
			
 
				+		if cmd == IKCP_CMD_ACK {
			
 
				+			kcp.parse_ack(sn)
			
 
				+			kcp.shrink_buf()
			
 
				+			if flag == 0 {
			
 
				+				flag = 1
			
 
				+				maxack = sn
			
 
				+			} else if _itimediff(sn, maxack) > 0 {
			
 
				+				maxack = sn
			
 
				+			}
			
 
				+			lastackts = ts
			
 
				+		} else if cmd == IKCP_CMD_PUSH {
			
 
				+			if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
			
 
				+				kcp.ack_push(sn, ts)
			
 
				+				if _itimediff(sn, kcp.rcv_nxt) >= 0 {
			
 
				+					seg := kcp.newSegment(int(length))
			
 
				+					seg.conv = conv
			
 
				+					seg.cmd = cmd
			
 
				+					seg.frg = frg
			
 
				+					seg.wnd = wnd
			
 
				+					seg.ts = ts
			
 
				+					seg.sn = sn
			
 
				+					seg.una = una
			
 
				+					copy(seg.data, data[:length])
			
 
				+					kcp.parse_data(seg)
			
 
				+				} else {
			
 
				+					atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
			
 
				+				}
			
 
				+			} else {
			
 
				+				atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
			
 
				+			}
			
 
				+		} else if cmd == IKCP_CMD_WASK {
			
 
				+			// ready to send back IKCP_CMD_WINS in Ikcp_flush
			
 
				+			// tell remote my window size
			
 
				+			kcp.probe |= IKCP_ASK_TELL
			
 
				+		} else if cmd == IKCP_CMD_WINS {
			
 
				+			// do nothing
			
 
				+		} else {
			
 
				+			return -3
			
 
				+		}
			
 
				+
			
 
				+		inSegs++
			
 
				+		data = data[length:]
			
 
				+	}
			
 
				+	atomic.AddUint64(&DefaultSnmp.InSegs, inSegs)
			
 
				+
			
 
				+	if flag != 0 && regular {
			
 
				+		kcp.parse_fastack(maxack)
			
 
				+		current := currentMs()
			
 
				+		if _itimediff(current, lastackts) >= 0 {
			
 
				+			kcp.update_ack(_itimediff(current, lastackts))
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if _itimediff(kcp.snd_una, una) > 0 {
			
 
				+		if kcp.cwnd < kcp.rmt_wnd {
			
 
				+			mss := kcp.mss
			
 
				+			if kcp.cwnd < kcp.ssthresh {
			
 
				+				kcp.cwnd++
			
 
				+				kcp.incr += mss
			
 
				+			} else {
			
 
				+				if kcp.incr < mss {
			
 
				+					kcp.incr = mss
			
 
				+				}
			
 
				+				kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
			
 
				+				if (kcp.cwnd+1)*mss <= kcp.incr {
			
 
				+					kcp.cwnd++
			
 
				+				}
			
 
				+			}
			
 
				+			if kcp.cwnd > kcp.rmt_wnd {
			
 
				+				kcp.cwnd = kcp.rmt_wnd
			
 
				+				kcp.incr = kcp.rmt_wnd * mss
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately
			
 
				+		kcp.flush(true)
			
 
				+	} else if kcp.rmt_wnd == 0 && len(kcp.acklist) > 0 { // window zero
			
 
				+		kcp.flush(true)
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+func (kcp *KCP) wnd_unused() uint16 {
			
 
				+	if len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
			
 
				+		return uint16(int(kcp.rcv_wnd) - len(kcp.rcv_queue))
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+// flush pending data
			
 
				+func (kcp *KCP) flush(ackOnly bool) {
			
 
				+	var seg segment
			
 
				+	seg.conv = kcp.conv
			
 
				+	seg.cmd = IKCP_CMD_ACK
			
 
				+	seg.wnd = kcp.wnd_unused()
			
 
				+	seg.una = kcp.rcv_nxt
			
 
				+
			
 
				+	buffer := kcp.buffer
			
 
				+	// flush acknowledges
			
 
				+	ptr := buffer
			
 
				+	for i, ack := range kcp.acklist {
			
 
				+		size := len(buffer) - len(ptr)
			
 
				+		if size+IKCP_OVERHEAD > int(kcp.mtu) {
			
 
				+			kcp.output(buffer, size)
			
 
				+			ptr = buffer
			
 
				+		}
			
 
				+		// filter jitters caused by bufferbloat
			
 
				+		if ack.sn >= kcp.rcv_nxt || len(kcp.acklist)-1 == i {
			
 
				+			seg.sn, seg.ts = ack.sn, ack.ts
			
 
				+			ptr = seg.encode(ptr)
			
 
				+		}
			
 
				+	}
			
 
				+	kcp.acklist = kcp.acklist[0:0]
			
 
				+
			
 
				+	if ackOnly { // flash remain ack segments
			
 
				+		size := len(buffer) - len(ptr)
			
 
				+		if size > 0 {
			
 
				+			kcp.output(buffer, size)
			
 
				+		}
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	// probe window size (if remote window size equals zero)
			
 
				+	if kcp.rmt_wnd == 0 {
			
 
				+		current := currentMs()
			
 
				+		if kcp.probe_wait == 0 {
			
 
				+			kcp.probe_wait = IKCP_PROBE_INIT
			
 
				+			kcp.ts_probe = current + kcp.probe_wait
			
 
				+		} else {
			
 
				+			if _itimediff(current, kcp.ts_probe) >= 0 {
			
 
				+				if kcp.probe_wait < IKCP_PROBE_INIT {
			
 
				+					kcp.probe_wait = IKCP_PROBE_INIT
			
 
				+				}
			
 
				+				kcp.probe_wait += kcp.probe_wait / 2
			
 
				+				if kcp.probe_wait > IKCP_PROBE_LIMIT {
			
 
				+					kcp.probe_wait = IKCP_PROBE_LIMIT
			
 
				+				}
			
 
				+				kcp.ts_probe = current + kcp.probe_wait
			
 
				+				kcp.probe |= IKCP_ASK_SEND
			
 
				+			}
			
 
				+		}
			
 
				+	} else {
			
 
				+		kcp.ts_probe = 0
			
 
				+		kcp.probe_wait = 0
			
 
				+	}
			
 
				+
			
 
				+	// flush window probing commands
			
 
				+	if (kcp.probe & IKCP_ASK_SEND) != 0 {
			
 
				+		seg.cmd = IKCP_CMD_WASK
			
 
				+		size := len(buffer) - len(ptr)
			
 
				+		if size+IKCP_OVERHEAD > int(kcp.mtu) {
			
 
				+			kcp.output(buffer, size)
			
 
				+			ptr = buffer
			
 
				+		}
			
 
				+		ptr = seg.encode(ptr)
			
 
				+	}
			
 
				+
			
 
				+	// flush window probing commands
			
 
				+	if (kcp.probe & IKCP_ASK_TELL) != 0 {
			
 
				+		seg.cmd = IKCP_CMD_WINS
			
 
				+		size := len(buffer) - len(ptr)
			
 
				+		if size+IKCP_OVERHEAD > int(kcp.mtu) {
			
 
				+			kcp.output(buffer, size)
			
 
				+			ptr = buffer
			
 
				+		}
			
 
				+		ptr = seg.encode(ptr)
			
 
				+	}
			
 
				+
			
 
				+	kcp.probe = 0
			
 
				+
			
 
				+	// calculate window size
			
 
				+	cwnd := _imin_(kcp.snd_wnd, kcp.rmt_wnd)
			
 
				+	if kcp.nocwnd == 0 {
			
 
				+		cwnd = _imin_(kcp.cwnd, cwnd)
			
 
				+	}
			
 
				+
			
 
				+	// sliding window, controlled by snd_nxt && sna_una+cwnd
			
 
				+	newSegsCount := 0
			
 
				+	for k := range kcp.snd_queue {
			
 
				+		if _itimediff(kcp.snd_nxt, kcp.snd_una+cwnd) >= 0 {
			
 
				+			break
			
 
				+		}
			
 
				+		newseg := kcp.snd_queue[k]
			
 
				+		newseg.conv = kcp.conv
			
 
				+		newseg.cmd = IKCP_CMD_PUSH
			
 
				+		newseg.sn = kcp.snd_nxt
			
 
				+		kcp.snd_buf = append(kcp.snd_buf, newseg)
			
 
				+		kcp.snd_nxt++
			
 
				+		newSegsCount++
			
 
				+		kcp.snd_queue[k].data = nil
			
 
				+	}
			
 
				+	if newSegsCount > 0 {
			
 
				+		kcp.snd_queue = kcp.remove_front(kcp.snd_queue, newSegsCount)
			
 
				+	}
			
 
				+
			
 
				+	// calculate resent
			
 
				+	resent := uint32(kcp.fastresend)
			
 
				+	if kcp.fastresend <= 0 {
			
 
				+		resent = 0xffffffff
			
 
				+	}
			
 
				+
			
 
				+	// check for retransmissions
			
 
				+	current := currentMs()
			
 
				+	var change, lost, lostSegs, fastRetransSegs, earlyRetransSegs uint64
			
 
				+	for k := range kcp.snd_buf {
			
 
				+		segment := &kcp.snd_buf[k]
			
 
				+		needsend := false
			
 
				+		if segment.xmit == 0 { // initial transmit
			
 
				+			needsend = true
			
 
				+			segment.rto = kcp.rx_rto
			
 
				+			segment.resendts = current + segment.rto
			
 
				+		} else if _itimediff(current, segment.resendts) >= 0 { // RTO
			
 
				+			needsend = true
			
 
				+			if kcp.nodelay == 0 {
			
 
				+				segment.rto += kcp.rx_rto
			
 
				+			} else {
			
 
				+				segment.rto += kcp.rx_rto / 2
			
 
				+			}
			
 
				+			segment.resendts = current + segment.rto
			
 
				+			lost++
			
 
				+			lostSegs++
			
 
				+		} else if segment.fastack >= resent { // fast retransmit
			
 
				+			needsend = true
			
 
				+			segment.fastack = 0
			
 
				+			segment.rto = kcp.rx_rto
			
 
				+			segment.resendts = current + segment.rto
			
 
				+			change++
			
 
				+			fastRetransSegs++
			
 
				+		} else if segment.fastack > 0 && newSegsCount == 0 { // early retransmit
			
 
				+			needsend = true
			
 
				+			segment.fastack = 0
			
 
				+			segment.rto = kcp.rx_rto
			
 
				+			segment.resendts = current + segment.rto
			
 
				+			change++
			
 
				+			earlyRetransSegs++
			
 
				+		}
			
 
				+
			
 
				+		if needsend {
			
 
				+			segment.xmit++
			
 
				+			segment.ts = current
			
 
				+			segment.wnd = seg.wnd
			
 
				+			segment.una = seg.una
			
 
				+
			
 
				+			size := len(buffer) - len(ptr)
			
 
				+			need := IKCP_OVERHEAD + len(segment.data)
			
 
				+
			
 
				+			if size+need > int(kcp.mtu) {
			
 
				+				kcp.output(buffer, size)
			
 
				+				current = currentMs() // time update for a blocking call
			
 
				+				ptr = buffer
			
 
				+			}
			
 
				+
			
 
				+			ptr = segment.encode(ptr)
			
 
				+			copy(ptr, segment.data)
			
 
				+			ptr = ptr[len(segment.data):]
			
 
				+
			
 
				+			if segment.xmit >= kcp.dead_link {
			
 
				+				kcp.state = 0xFFFFFFFF
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// flash remain segments
			
 
				+	size := len(buffer) - len(ptr)
			
 
				+	if size > 0 {
			
 
				+		kcp.output(buffer, size)
			
 
				+	}
			
 
				+
			
 
				+	// counter updates
			
 
				+	sum := lostSegs
			
 
				+	if lostSegs > 0 {
			
 
				+		atomic.AddUint64(&DefaultSnmp.LostSegs, lostSegs)
			
 
				+	}
			
 
				+	if fastRetransSegs > 0 {
			
 
				+		atomic.AddUint64(&DefaultSnmp.FastRetransSegs, fastRetransSegs)
			
 
				+		sum += fastRetransSegs
			
 
				+	}
			
 
				+	if earlyRetransSegs > 0 {
			
 
				+		atomic.AddUint64(&DefaultSnmp.EarlyRetransSegs, earlyRetransSegs)
			
 
				+		sum += earlyRetransSegs
			
 
				+	}
			
 
				+	if sum > 0 {
			
 
				+		atomic.AddUint64(&DefaultSnmp.RetransSegs, sum)
			
 
				+	}
			
 
				+
			
 
				+	// update ssthresh
			
 
				+	// rate halving, https://tools.ietf.org/html/rfc6937
			
 
				+	if change > 0 {
			
 
				+		inflight := kcp.snd_nxt - kcp.snd_una
			
 
				+		kcp.ssthresh = inflight / 2
			
 
				+		if kcp.ssthresh < IKCP_THRESH_MIN {
			
 
				+			kcp.ssthresh = IKCP_THRESH_MIN
			
 
				+		}
			
 
				+		kcp.cwnd = kcp.ssthresh + resent
			
 
				+		kcp.incr = kcp.cwnd * kcp.mss
			
 
				+	}
			
 
				+
			
 
				+	// congestion control, https://tools.ietf.org/html/rfc5681
			
 
				+	if lost > 0 {
			
 
				+		kcp.ssthresh = cwnd / 2
			
 
				+		if kcp.ssthresh < IKCP_THRESH_MIN {
			
 
				+			kcp.ssthresh = IKCP_THRESH_MIN
			
 
				+		}
			
 
				+		kcp.cwnd = 1
			
 
				+		kcp.incr = kcp.mss
			
 
				+	}
			
 
				+
			
 
				+	if kcp.cwnd < 1 {
			
 
				+		kcp.cwnd = 1
			
 
				+		kcp.incr = kcp.mss
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Update updates state (call it repeatedly, every 10ms-100ms), or you can ask
			
 
				+// ikcp_check when to call it again (without ikcp_input/_send calling).
			
 
				+// 'current' - current timestamp in millisec.
			
 
				+func (kcp *KCP) Update() {
			
 
				+	var slap int32
			
 
				+
			
 
				+	current := currentMs()
			
 
				+	if kcp.updated == 0 {
			
 
				+		kcp.updated = 1
			
 
				+		kcp.ts_flush = current
			
 
				+	}
			
 
				+
			
 
				+	slap = _itimediff(current, kcp.ts_flush)
			
 
				+
			
 
				+	if slap >= 10000 || slap < -10000 {
			
 
				+		kcp.ts_flush = current
			
 
				+		slap = 0
			
 
				+	}
			
 
				+
			
 
				+	if slap >= 0 {
			
 
				+		kcp.ts_flush += kcp.interval
			
 
				+		if _itimediff(current, kcp.ts_flush) >= 0 {
			
 
				+			kcp.ts_flush = current + kcp.interval
			
 
				+		}
			
 
				+		kcp.flush(false)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Check determines when should you invoke ikcp_update:
			
 
				+// returns when you should invoke ikcp_update in millisec, if there
			
 
				+// is no ikcp_input/_send calling. you can call ikcp_update in that
			
 
				+// time, instead of call update repeatly.
			
 
				+// Important to reduce unnacessary ikcp_update invoking. use it to
			
 
				+// schedule ikcp_update (eg. implementing an epoll-like mechanism,
			
 
				+// or optimize ikcp_update when handling massive kcp connections)
			
 
				+func (kcp *KCP) Check() uint32 {
			
 
				+	current := currentMs()
			
 
				+	ts_flush := kcp.ts_flush
			
 
				+	tm_flush := int32(0x7fffffff)
			
 
				+	tm_packet := int32(0x7fffffff)
			
 
				+	minimal := uint32(0)
			
 
				+	if kcp.updated == 0 {
			
 
				+		return current
			
 
				+	}
			
 
				+
			
 
				+	if _itimediff(current, ts_flush) >= 10000 ||
			
 
				+		_itimediff(current, ts_flush) < -10000 {
			
 
				+		ts_flush = current
			
 
				+	}
			
 
				+
			
 
				+	if _itimediff(current, ts_flush) >= 0 {
			
 
				+		return current
			
 
				+	}
			
 
				+
			
 
				+	tm_flush = _itimediff(ts_flush, current)
			
 
				+
			
 
				+	for k := range kcp.snd_buf {
			
 
				+		seg := &kcp.snd_buf[k]
			
 
				+		diff := _itimediff(seg.resendts, current)
			
 
				+		if diff <= 0 {
			
 
				+			return current
			
 
				+		}
			
 
				+		if diff < tm_packet {
			
 
				+			tm_packet = diff
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	minimal = uint32(tm_packet)
			
 
				+	if tm_packet >= tm_flush {
			
 
				+		minimal = uint32(tm_flush)
			
 
				+	}
			
 
				+	if minimal >= kcp.interval {
			
 
				+		minimal = kcp.interval
			
 
				+	}
			
 
				+
			
 
				+	return current + minimal
			
 
				+}
			
 
				+
			
 
				+// SetMtu changes MTU size, default is 1400
			
 
				+func (kcp *KCP) SetMtu(mtu int) int {
			
 
				+	if mtu < 50 || mtu < IKCP_OVERHEAD {
			
 
				+		return -1
			
 
				+	}
			
 
				+	buffer := make([]byte, (mtu+IKCP_OVERHEAD)*3)
			
 
				+	if buffer == nil {
			
 
				+		return -2
			
 
				+	}
			
 
				+	kcp.mtu = uint32(mtu)
			
 
				+	kcp.mss = kcp.mtu - IKCP_OVERHEAD
			
 
				+	kcp.buffer = buffer
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+// NoDelay options
			
 
				+// fastest: ikcp_nodelay(kcp, 1, 20, 2, 1)
			
 
				+// nodelay: 0:disable(default), 1:enable
			
 
				+// interval: internal update timer interval in millisec, default is 100ms
			
 
				+// resend: 0:disable fast resend(default), 1:enable fast resend
			
 
				+// nc: 0:normal congestion control(default), 1:disable congestion control
			
 
				+func (kcp *KCP) NoDelay(nodelay, interval, resend, nc int) int {
			
 
				+	if nodelay >= 0 {
			
 
				+		kcp.nodelay = uint32(nodelay)
			
 
				+		if nodelay != 0 {
			
 
				+			kcp.rx_minrto = IKCP_RTO_NDL
			
 
				+		} else {
			
 
				+			kcp.rx_minrto = IKCP_RTO_MIN
			
 
				+		}
			
 
				+	}
			
 
				+	if interval >= 0 {
			
 
				+		if interval > 5000 {
			
 
				+			interval = 5000
			
 
				+		} else if interval < 10 {
			
 
				+			interval = 10
			
 
				+		}
			
 
				+		kcp.interval = uint32(interval)
			
 
				+	}
			
 
				+	if resend >= 0 {
			
 
				+		kcp.fastresend = int32(resend)
			
 
				+	}
			
 
				+	if nc >= 0 {
			
 
				+		kcp.nocwnd = int32(nc)
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+// WndSize sets maximum window size: sndwnd=32, rcvwnd=32 by default
			
 
				+func (kcp *KCP) WndSize(sndwnd, rcvwnd int) int {
			
 
				+	if sndwnd > 0 {
			
 
				+		kcp.snd_wnd = uint32(sndwnd)
			
 
				+	}
			
 
				+	if rcvwnd > 0 {
			
 
				+		kcp.rcv_wnd = uint32(rcvwnd)
			
 
				+	}
			
 
				+	return 0
			
 
				+}
			
 
				+
			
 
				+// WaitSnd gets how many packet is waiting to be sent
			
 
				+func (kcp *KCP) WaitSnd() int {
			
 
				+	return len(kcp.snd_buf) + len(kcp.snd_queue)
			
 
				+}
			
 
				+
			
 
				+// remove front n elements from queue
			
 
				+func (kcp *KCP) remove_front(q []segment, n int) []segment {
			
 
				+	newn := copy(q, q[n:])
			
 
				+	for i := newn; i < len(q); i++ {
			
 
				+		q[i] = segment{} // manual set nil for GC
			
 
				+	}
			
 
				+	return q[:newn]
			
 
				+}
			
--- a/vendor/github.com/fatedier/kcp-go/sess.go
+++ b/vendor/github.com/fatedier/kcp-go/sess.go
@@ -0,0 +1,975 @@
 
				+package kcp
			
 
				+
			
 
				+import (
			
 
				+	"crypto/rand"
			
 
				+	"encoding/binary"
			
 
				+	"hash/crc32"
			
 
				+	"io"
			
 
				+	"net"
			
 
				+	"sync"
			
 
				+	"sync/atomic"
			
 
				+	"time"
			
 
				+
			
 
				+	"github.com/pkg/errors"
			
 
				+	"golang.org/x/net/ipv4"
			
 
				+)
			
 
				+
			
 
				+type errTimeout struct {
			
 
				+	error
			
 
				+}
			
 
				+
			
 
				+func (errTimeout) Timeout() bool   { return true }
			
 
				+func (errTimeout) Temporary() bool { return true }
			
 
				+func (errTimeout) Error() string   { return "i/o timeout" }
			
 
				+
			
 
				+const (
			
 
				+	// 16-bytes magic number for each packet
			
 
				+	nonceSize = 16
			
 
				+
			
 
				+	// 4-bytes packet checksum
			
 
				+	crcSize = 4
			
 
				+
			
 
				+	// overall crypto header size
			
 
				+	cryptHeaderSize = nonceSize + crcSize
			
 
				+
			
 
				+	// maximum packet size
			
 
				+	mtuLimit = 1500
			
 
				+
			
 
				+	// FEC keeps rxFECMulti* (dataShard+parityShard) ordered packets in memory
			
 
				+	rxFECMulti = 3
			
 
				+
			
 
				+	// accept backlog
			
 
				+	acceptBacklog = 128
			
 
				+
			
 
				+	// prerouting(to session) queue
			
 
				+	qlen = 128
			
 
				+)
			
 
				+
			
 
				+const (
			
 
				+	errBrokenPipe       = "broken pipe"
			
 
				+	errInvalidOperation = "invalid operation"
			
 
				+)
			
 
				+
			
 
				+var (
			
 
				+	// global packet buffer
			
 
				+	// shared among sending/receiving/FEC
			
 
				+	xmitBuf sync.Pool
			
 
				+)
			
 
				+
			
 
				+func init() {
			
 
				+	xmitBuf.New = func() interface{} {
			
 
				+		return make([]byte, mtuLimit)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+type (
			
 
				+	// UDPSession defines a KCP session implemented by UDP
			
 
				+	UDPSession struct {
			
 
				+		updaterIdx int            // record slice index in updater
			
 
				+		conn       net.PacketConn // the underlying packet connection
			
 
				+		kcp        *KCP           // KCP ARQ protocol
			
 
				+		l          *Listener      // point to the Listener if it's accepted by Listener
			
 
				+		block      BlockCrypt     // block encryption
			
 
				+
			
 
				+		// kcp receiving is based on packets
			
 
				+		// recvbuf turns packets into stream
			
 
				+		recvbuf []byte
			
 
				+		bufptr  []byte
			
 
				+		// extended output buffer(with header)
			
 
				+		ext []byte
			
 
				+
			
 
				+		// FEC
			
 
				+		fecDecoder *fecDecoder
			
 
				+		fecEncoder *fecEncoder
			
 
				+
			
 
				+		// settings
			
 
				+		remote     net.Addr  // remote peer address
			
 
				+		rd         time.Time // read deadline
			
 
				+		wd         time.Time // write deadline
			
 
				+		headerSize int       // the overall header size added before KCP frame
			
 
				+		ackNoDelay bool      // send ack immediately for each incoming packet
			
 
				+		writeDelay bool      // delay kcp.flush() for Write() for bulk transfer
			
 
				+		dup        int       // duplicate udp packets
			
 
				+
			
 
				+		// notifications
			
 
				+		die          chan struct{} // notify session has Closed
			
 
				+		chReadEvent  chan struct{} // notify Read() can be called without blocking
			
 
				+		chWriteEvent chan struct{} // notify Write() can be called without blocking
			
 
				+		chErrorEvent chan error    // notify Read() have an error
			
 
				+
			
 
				+		isClosed bool // flag the session has Closed
			
 
				+		mu       sync.Mutex
			
 
				+	}
			
 
				+
			
 
				+	setReadBuffer interface {
			
 
				+		SetReadBuffer(bytes int) error
			
 
				+	}
			
 
				+
			
 
				+	setWriteBuffer interface {
			
 
				+		SetWriteBuffer(bytes int) error
			
 
				+	}
			
 
				+)
			
 
				+
			
 
				+// newUDPSession create a new udp session for client or server
			
 
				+func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
			
 
				+	sess := new(UDPSession)
			
 
				+	sess.die = make(chan struct{})
			
 
				+	sess.chReadEvent = make(chan struct{}, 1)
			
 
				+	sess.chWriteEvent = make(chan struct{}, 1)
			
 
				+	sess.chErrorEvent = make(chan error, 1)
			
 
				+	sess.remote = remote
			
 
				+	sess.conn = conn
			
 
				+	sess.l = l
			
 
				+	sess.block = block
			
 
				+	sess.recvbuf = make([]byte, mtuLimit)
			
 
				+
			
 
				+	// FEC initialization
			
 
				+	sess.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
			
 
				+	if sess.block != nil {
			
 
				+		sess.fecEncoder = newFECEncoder(dataShards, parityShards, cryptHeaderSize)
			
 
				+	} else {
			
 
				+		sess.fecEncoder = newFECEncoder(dataShards, parityShards, 0)
			
 
				+	}
			
 
				+
			
 
				+	// calculate header size
			
 
				+	if sess.block != nil {
			
 
				+		sess.headerSize += cryptHeaderSize
			
 
				+	}
			
 
				+	if sess.fecEncoder != nil {
			
 
				+		sess.headerSize += fecHeaderSizePlus2
			
 
				+	}
			
 
				+
			
 
				+	// only allocate extended packet buffer
			
 
				+	// when the extra header is required
			
 
				+	if sess.headerSize > 0 {
			
 
				+		sess.ext = make([]byte, mtuLimit)
			
 
				+	}
			
 
				+
			
 
				+	sess.kcp = NewKCP(conv, func(buf []byte, size int) {
			
 
				+		if size >= IKCP_OVERHEAD {
			
 
				+			sess.output(buf[:size])
			
 
				+		}
			
 
				+	})
			
 
				+	sess.kcp.SetMtu(IKCP_MTU_DEF - sess.headerSize)
			
 
				+
			
 
				+	// add current session to the global updater,
			
 
				+	// which periodically calls sess.update()
			
 
				+	updater.addSession(sess)
			
 
				+
			
 
				+	if sess.l == nil { // it's a client connection
			
 
				+		go sess.readLoop()
			
 
				+		atomic.AddUint64(&DefaultSnmp.ActiveOpens, 1)
			
 
				+	} else {
			
 
				+		atomic.AddUint64(&DefaultSnmp.PassiveOpens, 1)
			
 
				+	}
			
 
				+	currestab := atomic.AddUint64(&DefaultSnmp.CurrEstab, 1)
			
 
				+	maxconn := atomic.LoadUint64(&DefaultSnmp.MaxConn)
			
 
				+	if currestab > maxconn {
			
 
				+		atomic.CompareAndSwapUint64(&DefaultSnmp.MaxConn, maxconn, currestab)
			
 
				+	}
			
 
				+
			
 
				+	return sess
			
 
				+}
			
 
				+
			
 
				+// Read implements net.Conn
			
 
				+func (s *UDPSession) Read(b []byte) (n int, err error) {
			
 
				+	for {
			
 
				+		s.mu.Lock()
			
 
				+		if len(s.bufptr) > 0 { // copy from buffer into b
			
 
				+			n = copy(b, s.bufptr)
			
 
				+			s.bufptr = s.bufptr[n:]
			
 
				+			s.mu.Unlock()
			
 
				+			return n, nil
			
 
				+		}
			
 
				+
			
 
				+		if s.isClosed {
			
 
				+			s.mu.Unlock()
			
 
				+			return 0, errors.New(errBrokenPipe)
			
 
				+		}
			
 
				+
			
 
				+		if size := s.kcp.PeekSize(); size > 0 { // peek data size from kcp
			
 
				+			atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(size))
			
 
				+			if len(b) >= size { // direct write to b
			
 
				+				s.kcp.Recv(b)
			
 
				+				s.mu.Unlock()
			
 
				+				return size, nil
			
 
				+			}
			
 
				+
			
 
				+			// resize kcp receive buffer
			
 
				+			// to make sure recvbuf has enough capacity
			
 
				+			if cap(s.recvbuf) < size {
			
 
				+				s.recvbuf = make([]byte, size)
			
 
				+			}
			
 
				+
			
 
				+			// resize recvbuf slice length
			
 
				+			s.recvbuf = s.recvbuf[:size]
			
 
				+			s.kcp.Recv(s.recvbuf)
			
 
				+			n = copy(b, s.recvbuf)   // copy to b
			
 
				+			s.bufptr = s.recvbuf[n:] // update pointer
			
 
				+			s.mu.Unlock()
			
 
				+			return n, nil
			
 
				+		}
			
 
				+
			
 
				+		// read deadline
			
 
				+		var timeout *time.Timer
			
 
				+		var c <-chan time.Time
			
 
				+		if !s.rd.IsZero() {
			
 
				+			if time.Now().After(s.rd) {
			
 
				+				s.mu.Unlock()
			
 
				+				return 0, errTimeout{}
			
 
				+			}
			
 
				+
			
 
				+			delay := s.rd.Sub(time.Now())
			
 
				+			timeout = time.NewTimer(delay)
			
 
				+			c = timeout.C
			
 
				+		}
			
 
				+		s.mu.Unlock()
			
 
				+
			
 
				+		// wait for read event or timeout
			
 
				+		select {
			
 
				+		case <-s.chReadEvent:
			
 
				+		case <-c:
			
 
				+		case <-s.die:
			
 
				+		case err = <-s.chErrorEvent:
			
 
				+			if timeout != nil {
			
 
				+				timeout.Stop()
			
 
				+			}
			
 
				+			return n, err
			
 
				+		}
			
 
				+
			
 
				+		if timeout != nil {
			
 
				+			timeout.Stop()
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Write implements net.Conn
			
 
				+func (s *UDPSession) Write(b []byte) (n int, err error) {
			
 
				+	for {
			
 
				+		s.mu.Lock()
			
 
				+		if s.isClosed {
			
 
				+			s.mu.Unlock()
			
 
				+			return 0, errors.New(errBrokenPipe)
			
 
				+		}
			
 
				+
			
 
				+		// api flow control
			
 
				+		if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
			
 
				+			n = len(b)
			
 
				+			for {
			
 
				+				if len(b) <= int(s.kcp.mss) {
			
 
				+					s.kcp.Send(b)
			
 
				+					break
			
 
				+				} else {
			
 
				+					s.kcp.Send(b[:s.kcp.mss])
			
 
				+					b = b[s.kcp.mss:]
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			if !s.writeDelay {
			
 
				+				s.kcp.flush(false)
			
 
				+			}
			
 
				+			s.mu.Unlock()
			
 
				+			atomic.AddUint64(&DefaultSnmp.BytesSent, uint64(n))
			
 
				+			return n, nil
			
 
				+		}
			
 
				+
			
 
				+		// write deadline
			
 
				+		var timeout *time.Timer
			
 
				+		var c <-chan time.Time
			
 
				+		if !s.wd.IsZero() {
			
 
				+			if time.Now().After(s.wd) {
			
 
				+				s.mu.Unlock()
			
 
				+				return 0, errTimeout{}
			
 
				+			}
			
 
				+			delay := s.wd.Sub(time.Now())
			
 
				+			timeout = time.NewTimer(delay)
			
 
				+			c = timeout.C
			
 
				+		}
			
 
				+		s.mu.Unlock()
			
 
				+
			
 
				+		// wait for write event or timeout
			
 
				+		select {
			
 
				+		case <-s.chWriteEvent:
			
 
				+		case <-c:
			
 
				+		case <-s.die:
			
 
				+		}
			
 
				+
			
 
				+		if timeout != nil {
			
 
				+			timeout.Stop()
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Close closes the connection.
			
 
				+func (s *UDPSession) Close() error {
			
 
				+	// remove this session from updater & listener(if necessary)
			
 
				+	updater.removeSession(s)
			
 
				+	if s.l != nil { // notify listener
			
 
				+		s.l.closeSession(sessionKey{
			
 
				+			addr:   s.remote.String(),
			
 
				+			convID: s.kcp.conv,
			
 
				+		})
			
 
				+	}
			
 
				+
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	if s.isClosed {
			
 
				+		return errors.New(errBrokenPipe)
			
 
				+	}
			
 
				+	close(s.die)
			
 
				+	s.isClosed = true
			
 
				+	atomic.AddUint64(&DefaultSnmp.CurrEstab, ^uint64(0))
			
 
				+	if s.l == nil { // client socket close
			
 
				+		return s.conn.Close()
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// LocalAddr returns the local network address. The Addr returned is shared by all invocations of LocalAddr, so do not modify it.
			
 
				+func (s *UDPSession) LocalAddr() net.Addr { return s.conn.LocalAddr() }
			
 
				+
			
 
				+// RemoteAddr returns the remote network address. The Addr returned is shared by all invocations of RemoteAddr, so do not modify it.
			
 
				+func (s *UDPSession) RemoteAddr() net.Addr { return s.remote }
			
 
				+
			
 
				+// SetDeadline sets the deadline associated with the listener. A zero time value disables the deadline.
			
 
				+func (s *UDPSession) SetDeadline(t time.Time) error {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.rd = t
			
 
				+	s.wd = t
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// SetReadDeadline implements the Conn SetReadDeadline method.
			
 
				+func (s *UDPSession) SetReadDeadline(t time.Time) error {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.rd = t
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// SetWriteDeadline implements the Conn SetWriteDeadline method.
			
 
				+func (s *UDPSession) SetWriteDeadline(t time.Time) error {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.wd = t
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// SetWriteDelay delays write for bulk transfer until the next update interval
			
 
				+func (s *UDPSession) SetWriteDelay(delay bool) {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.writeDelay = delay
			
 
				+}
			
 
				+
			
 
				+// SetWindowSize set maximum window size
			
 
				+func (s *UDPSession) SetWindowSize(sndwnd, rcvwnd int) {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.kcp.WndSize(sndwnd, rcvwnd)
			
 
				+}
			
 
				+
			
 
				+// SetMtu sets the maximum transmission unit(not including UDP header)
			
 
				+func (s *UDPSession) SetMtu(mtu int) bool {
			
 
				+	if mtu > mtuLimit {
			
 
				+		return false
			
 
				+	}
			
 
				+
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.kcp.SetMtu(mtu - s.headerSize)
			
 
				+	return true
			
 
				+}
			
 
				+
			
 
				+// SetStreamMode toggles the stream mode on/off
			
 
				+func (s *UDPSession) SetStreamMode(enable bool) {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	if enable {
			
 
				+		s.kcp.stream = 1
			
 
				+	} else {
			
 
				+		s.kcp.stream = 0
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// SetACKNoDelay changes ack flush option, set true to flush ack immediately,
			
 
				+func (s *UDPSession) SetACKNoDelay(nodelay bool) {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.ackNoDelay = nodelay
			
 
				+}
			
 
				+
			
 
				+// SetDUP duplicates udp packets for kcp output, for testing purpose only
			
 
				+func (s *UDPSession) SetDUP(dup int) {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.dup = dup
			
 
				+}
			
 
				+
			
 
				+// SetNoDelay calls nodelay() of kcp
			
 
				+// https://github.com/skywind3000/kcp/blob/master/README.en.md#protocol-configuration
			
 
				+func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	s.kcp.NoDelay(nodelay, interval, resend, nc)
			
 
				+}
			
 
				+
			
 
				+// SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener
			
 
				+func (s *UDPSession) SetDSCP(dscp int) error {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	if s.l == nil {
			
 
				+		if nc, ok := s.conn.(*connectedUDPConn); ok {
			
 
				+			return ipv4.NewConn(nc.UDPConn).SetTOS(dscp << 2)
			
 
				+		} else if nc, ok := s.conn.(net.Conn); ok {
			
 
				+			return ipv4.NewConn(nc).SetTOS(dscp << 2)
			
 
				+		}
			
 
				+	}
			
 
				+	return errors.New(errInvalidOperation)
			
 
				+}
			
 
				+
			
 
				+// SetReadBuffer sets the socket read buffer, no effect if it's accepted from Listener
			
 
				+func (s *UDPSession) SetReadBuffer(bytes int) error {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	if s.l == nil {
			
 
				+		if nc, ok := s.conn.(setReadBuffer); ok {
			
 
				+			return nc.SetReadBuffer(bytes)
			
 
				+		}
			
 
				+	}
			
 
				+	return errors.New(errInvalidOperation)
			
 
				+}
			
 
				+
			
 
				+// SetWriteBuffer sets the socket write buffer, no effect if it's accepted from Listener
			
 
				+func (s *UDPSession) SetWriteBuffer(bytes int) error {
			
 
				+	s.mu.Lock()
			
 
				+	defer s.mu.Unlock()
			
 
				+	if s.l == nil {
			
 
				+		if nc, ok := s.conn.(setWriteBuffer); ok {
			
 
				+			return nc.SetWriteBuffer(bytes)
			
 
				+		}
			
 
				+	}
			
 
				+	return errors.New(errInvalidOperation)
			
 
				+}
			
 
				+
			
 
				+// output pipeline entry
			
 
				+// steps for output data processing:
			
 
				+// 0. Header extends
			
 
				+// 1. FEC
			
 
				+// 2. CRC32
			
 
				+// 3. Encryption
			
 
				+// 4. WriteTo kernel
			
 
				+func (s *UDPSession) output(buf []byte) {
			
 
				+	var ecc [][]byte
			
 
				+
			
 
				+	// 0. extend buf's header space(if necessary)
			
 
				+	ext := buf
			
 
				+	if s.headerSize > 0 {
			
 
				+		ext = s.ext[:s.headerSize+len(buf)]
			
 
				+		copy(ext[s.headerSize:], buf)
			
 
				+	}
			
 
				+
			
 
				+	// 1. FEC encoding
			
 
				+	if s.fecEncoder != nil {
			
 
				+		ecc = s.fecEncoder.encode(ext)
			
 
				+	}
			
 
				+
			
 
				+	// 2&3. crc32 & encryption
			
 
				+	if s.block != nil {
			
 
				+		io.ReadFull(rand.Reader, ext[:nonceSize])
			
 
				+		checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
			
 
				+		binary.LittleEndian.PutUint32(ext[nonceSize:], checksum)
			
 
				+		s.block.Encrypt(ext, ext)
			
 
				+
			
 
				+		for k := range ecc {
			
 
				+			io.ReadFull(rand.Reader, ecc[k][:nonceSize])
			
 
				+			checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
			
 
				+			binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
			
 
				+			s.block.Encrypt(ecc[k], ecc[k])
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	// 4. WriteTo kernel
			
 
				+	nbytes := 0
			
 
				+	npkts := 0
			
 
				+	for i := 0; i < s.dup+1; i++ {
			
 
				+		if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
			
 
				+			nbytes += n
			
 
				+			npkts++
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	for k := range ecc {
			
 
				+		if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
			
 
				+			nbytes += n
			
 
				+			npkts++
			
 
				+		}
			
 
				+	}
			
 
				+	atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
			
 
				+	atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes))
			
 
				+}
			
 
				+
			
 
				+// kcp update, returns interval for next calling
			
 
				+func (s *UDPSession) update() (interval time.Duration) {
			
 
				+	s.mu.Lock()
			
 
				+	s.kcp.flush(false)
			
 
				+	if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
			
 
				+		s.notifyWriteEvent()
			
 
				+	}
			
 
				+	interval = time.Duration(s.kcp.interval) * time.Millisecond
			
 
				+	s.mu.Unlock()
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+// GetConv gets conversation id of a session
			
 
				+func (s *UDPSession) GetConv() uint32 { return s.kcp.conv }
			
 
				+
			
 
				+func (s *UDPSession) notifyReadEvent() {
			
 
				+	select {
			
 
				+	case s.chReadEvent <- struct{}{}:
			
 
				+	default:
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (s *UDPSession) notifyWriteEvent() {
			
 
				+	select {
			
 
				+	case s.chWriteEvent <- struct{}{}:
			
 
				+	default:
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (s *UDPSession) kcpInput(data []byte) {
			
 
				+	var kcpInErrors, fecErrs, fecRecovered, fecParityShards uint64
			
 
				+
			
 
				+	if s.fecDecoder != nil {
			
 
				+		f := s.fecDecoder.decodeBytes(data)
			
 
				+		s.mu.Lock()
			
 
				+		if f.flag == typeData {
			
 
				+			if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true, s.ackNoDelay); ret != 0 {
			
 
				+				kcpInErrors++
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if f.flag == typeData || f.flag == typeFEC {
			
 
				+			if f.flag == typeFEC {
			
 
				+				fecParityShards++
			
 
				+			}
			
 
				+
			
 
				+			recovers := s.fecDecoder.decode(f)
			
 
				+			for _, r := range recovers {
			
 
				+				if len(r) >= 2 { // must be larger than 2bytes
			
 
				+					sz := binary.LittleEndian.Uint16(r)
			
 
				+					if int(sz) <= len(r) && sz >= 2 {
			
 
				+						if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
			
 
				+							fecRecovered++
			
 
				+						} else {
			
 
				+							kcpInErrors++
			
 
				+						}
			
 
				+					} else {
			
 
				+						fecErrs++
			
 
				+					}
			
 
				+				} else {
			
 
				+					fecErrs++
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		// notify reader
			
 
				+		if n := s.kcp.PeekSize(); n > 0 {
			
 
				+			s.notifyReadEvent()
			
 
				+		}
			
 
				+		s.mu.Unlock()
			
 
				+	} else {
			
 
				+		s.mu.Lock()
			
 
				+		if ret := s.kcp.Input(data, true, s.ackNoDelay); ret != 0 {
			
 
				+			kcpInErrors++
			
 
				+		}
			
 
				+		// notify reader
			
 
				+		if n := s.kcp.PeekSize(); n > 0 {
			
 
				+			s.notifyReadEvent()
			
 
				+		}
			
 
				+		s.mu.Unlock()
			
 
				+	}
			
 
				+
			
 
				+	atomic.AddUint64(&DefaultSnmp.InPkts, 1)
			
 
				+	atomic.AddUint64(&DefaultSnmp.InBytes, uint64(len(data)))
			
 
				+	if fecParityShards > 0 {
			
 
				+		atomic.AddUint64(&DefaultSnmp.FECParityShards, fecParityShards)
			
 
				+	}
			
 
				+	if kcpInErrors > 0 {
			
 
				+		atomic.AddUint64(&DefaultSnmp.KCPInErrors, kcpInErrors)
			
 
				+	}
			
 
				+	if fecErrs > 0 {
			
 
				+		atomic.AddUint64(&DefaultSnmp.FECErrs, fecErrs)
			
 
				+	}
			
 
				+	if fecRecovered > 0 {
			
 
				+		atomic.AddUint64(&DefaultSnmp.FECRecovered, fecRecovered)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (s *UDPSession) receiver(ch chan<- []byte) {
			
 
				+	for {
			
 
				+		data := xmitBuf.Get().([]byte)[:mtuLimit]
			
 
				+		if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
			
 
				+			select {
			
 
				+			case ch <- data[:n]:
			
 
				+			case <-s.die:
			
 
				+				return
			
 
				+			}
			
 
				+		} else if err != nil {
			
 
				+			s.chErrorEvent <- err
			
 
				+			return
			
 
				+		} else {
			
 
				+			atomic.AddUint64(&DefaultSnmp.InErrs, 1)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// read loop for client session
			
 
				+func (s *UDPSession) readLoop() {
			
 
				+	chPacket := make(chan []byte, qlen)
			
 
				+	go s.receiver(chPacket)
			
 
				+
			
 
				+	for {
			
 
				+		select {
			
 
				+		case data := <-chPacket:
			
 
				+			raw := data
			
 
				+			dataValid := false
			
 
				+			if s.block != nil {
			
 
				+				s.block.Decrypt(data, data)
			
 
				+				data = data[nonceSize:]
			
 
				+				checksum := crc32.ChecksumIEEE(data[crcSize:])
			
 
				+				if checksum == binary.LittleEndian.Uint32(data) {
			
 
				+					data = data[crcSize:]
			
 
				+					dataValid = true
			
 
				+				} else {
			
 
				+					atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
			
 
				+				}
			
 
				+			} else if s.block == nil {
			
 
				+				dataValid = true
			
 
				+			}
			
 
				+
			
 
				+			if dataValid {
			
 
				+				s.kcpInput(data)
			
 
				+			}
			
 
				+			xmitBuf.Put(raw)
			
 
				+		case <-s.die:
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+type (
			
 
				+	sessionKey struct {
			
 
				+		addr   string
			
 
				+		convID uint32
			
 
				+	}
			
 
				+
			
 
				+	// Listener defines a server listening for connections
			
 
				+	Listener struct {
			
 
				+		block        BlockCrypt     // block encryption
			
 
				+		dataShards   int            // FEC data shard
			
 
				+		parityShards int            // FEC parity shard
			
 
				+		fecDecoder   *fecDecoder    // FEC mock initialization
			
 
				+		conn         net.PacketConn // the underlying packet connection
			
 
				+
			
 
				+		sessions        map[sessionKey]*UDPSession // all sessions accepted by this Listener
			
 
				+		chAccepts       chan *UDPSession           // Listen() backlog
			
 
				+		chSessionClosed chan sessionKey            // session close queue
			
 
				+		headerSize      int                        // the overall header size added before KCP frame
			
 
				+		die             chan struct{}              // notify the listener has closed
			
 
				+		rd              atomic.Value               // read deadline for Accept()
			
 
				+		wd              atomic.Value
			
 
				+	}
			
 
				+
			
 
				+	// incoming packet
			
 
				+	inPacket struct {
			
 
				+		from net.Addr
			
 
				+		data []byte
			
 
				+	}
			
 
				+)
			
 
				+
			
 
				+// monitor incoming data for all connections of server
			
 
				+func (l *Listener) monitor() {
			
 
				+	// cache last session
			
 
				+	var lastKey sessionKey
			
 
				+	var lastSession *UDPSession
			
 
				+
			
 
				+	chPacket := make(chan inPacket, qlen)
			
 
				+	go l.receiver(chPacket)
			
 
				+	for {
			
 
				+		select {
			
 
				+		case p := <-chPacket:
			
 
				+			raw := p.data
			
 
				+			data := p.data
			
 
				+			from := p.from
			
 
				+			dataValid := false
			
 
				+			if l.block != nil {
			
 
				+				l.block.Decrypt(data, data)
			
 
				+				data = data[nonceSize:]
			
 
				+				checksum := crc32.ChecksumIEEE(data[crcSize:])
			
 
				+				if checksum == binary.LittleEndian.Uint32(data) {
			
 
				+					data = data[crcSize:]
			
 
				+					dataValid = true
			
 
				+				} else {
			
 
				+					atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
			
 
				+				}
			
 
				+			} else if l.block == nil {
			
 
				+				dataValid = true
			
 
				+			}
			
 
				+
			
 
				+			if dataValid {
			
 
				+				var conv uint32
			
 
				+				convValid := false
			
 
				+				if l.fecDecoder != nil {
			
 
				+					isfec := binary.LittleEndian.Uint16(data[4:])
			
 
				+					if isfec == typeData {
			
 
				+						conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
			
 
				+						convValid = true
			
 
				+					}
			
 
				+				} else {
			
 
				+					conv = binary.LittleEndian.Uint32(data)
			
 
				+					convValid = true
			
 
				+				}
			
 
				+
			
 
				+				if convValid {
			
 
				+					key := sessionKey{
			
 
				+						addr:   from.String(),
			
 
				+						convID: conv,
			
 
				+					}
			
 
				+					var s *UDPSession
			
 
				+					var ok bool
			
 
				+
			
 
				+					// packets received from an address always come in batch.
			
 
				+					// cache the session for next packet, without querying map.
			
 
				+					if key == lastKey {
			
 
				+						s, ok = lastSession, true
			
 
				+					} else if s, ok = l.sessions[key]; ok {
			
 
				+						lastSession = s
			
 
				+						lastKey = key
			
 
				+					}
			
 
				+
			
 
				+					if !ok { // new session
			
 
				+						if len(l.chAccepts) < cap(l.chAccepts) && len(l.sessions) < 4096 { // do not let new session overwhelm accept queue and connection count
			
 
				+							s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block)
			
 
				+							s.kcpInput(data)
			
 
				+							l.sessions[key] = s
			
 
				+							l.chAccepts <- s
			
 
				+						}
			
 
				+					} else {
			
 
				+						s.kcpInput(data)
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+
			
 
				+			xmitBuf.Put(raw)
			
 
				+		case key := <-l.chSessionClosed:
			
 
				+			if key == lastKey {
			
 
				+				lastKey = sessionKey{}
			
 
				+			}
			
 
				+			delete(l.sessions, key)
			
 
				+		case <-l.die:
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (l *Listener) receiver(ch chan<- inPacket) {
			
 
				+	for {
			
 
				+		data := xmitBuf.Get().([]byte)[:mtuLimit]
			
 
				+		if n, from, err := l.conn.ReadFrom(data); err == nil && n >= l.headerSize+IKCP_OVERHEAD {
			
 
				+			select {
			
 
				+			case ch <- inPacket{from, data[:n]}:
			
 
				+			case <-l.die:
			
 
				+				return
			
 
				+			}
			
 
				+		} else if err != nil {
			
 
				+			return
			
 
				+		} else {
			
 
				+			atomic.AddUint64(&DefaultSnmp.InErrs, 1)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// SetReadBuffer sets the socket read buffer for the Listener
			
 
				+func (l *Listener) SetReadBuffer(bytes int) error {
			
 
				+	if nc, ok := l.conn.(setReadBuffer); ok {
			
 
				+		return nc.SetReadBuffer(bytes)
			
 
				+	}
			
 
				+	return errors.New(errInvalidOperation)
			
 
				+}
			
 
				+
			
 
				+// SetWriteBuffer sets the socket write buffer for the Listener
			
 
				+func (l *Listener) SetWriteBuffer(bytes int) error {
			
 
				+	if nc, ok := l.conn.(setWriteBuffer); ok {
			
 
				+		return nc.SetWriteBuffer(bytes)
			
 
				+	}
			
 
				+	return errors.New(errInvalidOperation)
			
 
				+}
			
 
				+
			
 
				+// SetDSCP sets the 6bit DSCP field of IP header
			
 
				+func (l *Listener) SetDSCP(dscp int) error {
			
 
				+	if nc, ok := l.conn.(net.Conn); ok {
			
 
				+		return ipv4.NewConn(nc).SetTOS(dscp << 2)
			
 
				+	}
			
 
				+	return errors.New(errInvalidOperation)
			
 
				+}
			
 
				+
			
 
				+// Accept implements the Accept method in the Listener interface; it waits for the next call and returns a generic Conn.
			
 
				+func (l *Listener) Accept() (net.Conn, error) {
			
 
				+	return l.AcceptKCP()
			
 
				+}
			
 
				+
			
 
				+// AcceptKCP accepts a KCP connection
			
 
				+func (l *Listener) AcceptKCP() (*UDPSession, error) {
			
 
				+	var timeout <-chan time.Time
			
 
				+	if tdeadline, ok := l.rd.Load().(time.Time); ok && !tdeadline.IsZero() {
			
 
				+		timeout = time.After(tdeadline.Sub(time.Now()))
			
 
				+	}
			
 
				+
			
 
				+	select {
			
 
				+	case <-timeout:
			
 
				+		return nil, &errTimeout{}
			
 
				+	case c := <-l.chAccepts:
			
 
				+		return c, nil
			
 
				+	case <-l.die:
			
 
				+		return nil, errors.New(errBrokenPipe)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// SetDeadline sets the deadline associated with the listener. A zero time value disables the deadline.
			
 
				+func (l *Listener) SetDeadline(t time.Time) error {
			
 
				+	l.SetReadDeadline(t)
			
 
				+	l.SetWriteDeadline(t)
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// SetReadDeadline implements the Conn SetReadDeadline method.
			
 
				+func (l *Listener) SetReadDeadline(t time.Time) error {
			
 
				+	l.rd.Store(t)
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// SetWriteDeadline implements the Conn SetWriteDeadline method.
			
 
				+func (l *Listener) SetWriteDeadline(t time.Time) error {
			
 
				+	l.wd.Store(t)
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// Close stops listening on the UDP address. Already Accepted connections are not closed.
			
 
				+func (l *Listener) Close() error {
			
 
				+	close(l.die)
			
 
				+	return l.conn.Close()
			
 
				+}
			
 
				+
			
 
				+// closeSession notify the listener that a session has closed
			
 
				+func (l *Listener) closeSession(key sessionKey) bool {
			
 
				+	select {
			
 
				+	case l.chSessionClosed <- key:
			
 
				+		return true
			
 
				+	case <-l.die:
			
 
				+		return false
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
			
 
				+func (l *Listener) Addr() net.Addr { return l.conn.LocalAddr() }
			
 
				+
			
 
				+// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
			
 
				+func Listen(laddr string) (net.Listener, error) { return ListenWithOptions(laddr, nil, 0, 0) }
			
 
				+
			
 
				+// ListenWithOptions listens for incoming KCP packets addressed to the local address laddr on the network "udp" with packet encryption,
			
 
				+// dataShards, parityShards defines Reed-Solomon Erasure Coding parameters
			
 
				+func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards int) (*Listener, error) {
			
 
				+	udpaddr, err := net.ResolveUDPAddr("udp", laddr)
			
 
				+	if err != nil {
			
 
				+		return nil, errors.Wrap(err, "net.ResolveUDPAddr")
			
 
				+	}
			
 
				+	conn, err := net.ListenUDP("udp", udpaddr)
			
 
				+	if err != nil {
			
 
				+		return nil, errors.Wrap(err, "net.ListenUDP")
			
 
				+	}
			
 
				+
			
 
				+	return ServeConn(block, dataShards, parityShards, conn)
			
 
				+}
			
 
				+
			
 
				+// ServeConn serves KCP protocol for a single packet connection.
			
 
				+func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*Listener, error) {
			
 
				+	l := new(Listener)
			
 
				+	l.conn = conn
			
 
				+	l.sessions = make(map[sessionKey]*UDPSession)
			
 
				+	l.chAccepts = make(chan *UDPSession, acceptBacklog)
			
 
				+	l.chSessionClosed = make(chan sessionKey)
			
 
				+	l.die = make(chan struct{})
			
 
				+	l.dataShards = dataShards
			
 
				+	l.parityShards = parityShards
			
 
				+	l.block = block
			
 
				+	l.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
			
 
				+
			
 
				+	// calculate header size
			
 
				+	if l.block != nil {
			
 
				+		l.headerSize += cryptHeaderSize
			
 
				+	}
			
 
				+	if l.fecDecoder != nil {
			
 
				+		l.headerSize += fecHeaderSizePlus2
			
 
				+	}
			
 
				+
			
 
				+	go l.monitor()
			
 
				+	return l, nil
			
 
				+}
			
 
				+
			
 
				+// Dial connects to the remote address "raddr" on the network "udp"
			
 
				+func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0, 0) }
			
 
				+
			
 
				+// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
			
 
				+func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
			
 
				+	udpaddr, err := net.ResolveUDPAddr("udp", raddr)
			
 
				+	if err != nil {
			
 
				+		return nil, errors.Wrap(err, "net.ResolveUDPAddr")
			
 
				+	}
			
 
				+
			
 
				+	udpconn, err := net.DialUDP("udp", nil, udpaddr)
			
 
				+	if err != nil {
			
 
				+		return nil, errors.Wrap(err, "net.DialUDP")
			
 
				+	}
			
 
				+
			
 
				+	return NewConn(raddr, block, dataShards, parityShards, &connectedUDPConn{udpconn})
			
 
				+}
			
 
				+
			
 
				+// NewConn establishes a session and talks KCP protocol over a packet connection.
			
 
				+func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
			
 
				+	udpaddr, err := net.ResolveUDPAddr("udp", raddr)
			
 
				+	if err != nil {
			
 
				+		return nil, errors.Wrap(err, "net.ResolveUDPAddr")
			
 
				+	}
			
 
				+
			
 
				+	var convid uint32
			
 
				+	binary.Read(rand.Reader, binary.LittleEndian, &convid)
			
 
				+	return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil
			
 
				+}
			
 
				+
			
 
				+func NewConnEx(convid uint32, connected bool, raddr string, block BlockCrypt, dataShards, parityShards int, conn *net.UDPConn) (*UDPSession, error) {
			
 
				+	udpaddr, err := net.ResolveUDPAddr("udp", raddr)
			
 
				+	if err != nil {
			
 
				+		return nil, errors.Wrap(err, "net.ResolveUDPAddr")
			
 
				+	}
			
 
				+
			
 
				+	var pConn net.PacketConn = conn
			
 
				+	if connected {
			
 
				+		pConn = &connectedUDPConn{conn}
			
 
				+	}
			
 
				+
			
 
				+	return newUDPSession(convid, dataShards, parityShards, nil, pConn, udpaddr, block), nil
			
 
				+}
			
 
				+
			
 
				+// returns current time in milliseconds
			
 
				+func currentMs() uint32 { return uint32(time.Now().UnixNano() / int64(time.Millisecond)) }
			
 
				+
			
 
				+// connectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
			
 
				+// to Write syscalls that are 4 times faster on some OS'es. This should only be
			
 
				+// used for connections that were produced by a net.Dial* call.
			
 
				+type connectedUDPConn struct{ *net.UDPConn }
			
 
				+
			
 
				+// WriteTo redirects all writes to the Write syscall, which is 4 times faster.
			
 
				+func (c *connectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { return c.Write(b) }
			
--- a/vendor/github.com/fatedier/kcp-go/snmp.go
+++ b/vendor/github.com/fatedier/kcp-go/snmp.go
@@ -0,0 +1,164 @@
 
				+package kcp
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"sync/atomic"
			
 
				+)
			
 
				+
			
 
				+// Snmp defines network statistics indicator
			
 
				+type Snmp struct {
			
 
				+	BytesSent        uint64 // bytes sent from upper level
			
 
				+	BytesReceived    uint64 // bytes received to upper level
			
 
				+	MaxConn          uint64 // max number of connections ever reached
			
 
				+	ActiveOpens      uint64 // accumulated active open connections
			
 
				+	PassiveOpens     uint64 // accumulated passive open connections
			
 
				+	CurrEstab        uint64 // current number of established connections
			
 
				+	InErrs           uint64 // UDP read errors reported from net.PacketConn
			
 
				+	InCsumErrors     uint64 // checksum errors from CRC32
			
 
				+	KCPInErrors      uint64 // packet iput errors reported from KCP
			
 
				+	InPkts           uint64 // incoming packets count
			
 
				+	OutPkts          uint64 // outgoing packets count
			
 
				+	InSegs           uint64 // incoming KCP segments
			
 
				+	OutSegs          uint64 // outgoing KCP segments
			
 
				+	InBytes          uint64 // UDP bytes received
			
 
				+	OutBytes         uint64 // UDP bytes sent
			
 
				+	RetransSegs      uint64 // accmulated retransmited segments
			
 
				+	FastRetransSegs  uint64 // accmulated fast retransmitted segments
			
 
				+	EarlyRetransSegs uint64 // accmulated early retransmitted segments
			
 
				+	LostSegs         uint64 // number of segs infered as lost
			
 
				+	RepeatSegs       uint64 // number of segs duplicated
			
 
				+	FECRecovered     uint64 // correct packets recovered from FEC
			
 
				+	FECErrs          uint64 // incorrect packets recovered from FEC
			
 
				+	FECParityShards  uint64 // FEC segments received
			
 
				+	FECShortShards   uint64 // number of data shards that's not enough for recovery
			
 
				+}
			
 
				+
			
 
				+func newSnmp() *Snmp {
			
 
				+	return new(Snmp)
			
 
				+}
			
 
				+
			
 
				+// Header returns all field names
			
 
				+func (s *Snmp) Header() []string {
			
 
				+	return []string{
			
 
				+		"BytesSent",
			
 
				+		"BytesReceived",
			
 
				+		"MaxConn",
			
 
				+		"ActiveOpens",
			
 
				+		"PassiveOpens",
			
 
				+		"CurrEstab",
			
 
				+		"InErrs",
			
 
				+		"InCsumErrors",
			
 
				+		"KCPInErrors",
			
 
				+		"InPkts",
			
 
				+		"OutPkts",
			
 
				+		"InSegs",
			
 
				+		"OutSegs",
			
 
				+		"InBytes",
			
 
				+		"OutBytes",
			
 
				+		"RetransSegs",
			
 
				+		"FastRetransSegs",
			
 
				+		"EarlyRetransSegs",
			
 
				+		"LostSegs",
			
 
				+		"RepeatSegs",
			
 
				+		"FECParityShards",
			
 
				+		"FECErrs",
			
 
				+		"FECRecovered",
			
 
				+		"FECShortShards",
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// ToSlice returns current snmp info as slice
			
 
				+func (s *Snmp) ToSlice() []string {
			
 
				+	snmp := s.Copy()
			
 
				+	return []string{
			
 
				+		fmt.Sprint(snmp.BytesSent),
			
 
				+		fmt.Sprint(snmp.BytesReceived),
			
 
				+		fmt.Sprint(snmp.MaxConn),
			
 
				+		fmt.Sprint(snmp.ActiveOpens),
			
 
				+		fmt.Sprint(snmp.PassiveOpens),
			
 
				+		fmt.Sprint(snmp.CurrEstab),
			
 
				+		fmt.Sprint(snmp.InErrs),
			
 
				+		fmt.Sprint(snmp.InCsumErrors),
			
 
				+		fmt.Sprint(snmp.KCPInErrors),
			
 
				+		fmt.Sprint(snmp.InPkts),
			
 
				+		fmt.Sprint(snmp.OutPkts),
			
 
				+		fmt.Sprint(snmp.InSegs),
			
 
				+		fmt.Sprint(snmp.OutSegs),
			
 
				+		fmt.Sprint(snmp.InBytes),
			
 
				+		fmt.Sprint(snmp.OutBytes),
			
 
				+		fmt.Sprint(snmp.RetransSegs),
			
 
				+		fmt.Sprint(snmp.FastRetransSegs),
			
 
				+		fmt.Sprint(snmp.EarlyRetransSegs),
			
 
				+		fmt.Sprint(snmp.LostSegs),
			
 
				+		fmt.Sprint(snmp.RepeatSegs),
			
 
				+		fmt.Sprint(snmp.FECParityShards),
			
 
				+		fmt.Sprint(snmp.FECErrs),
			
 
				+		fmt.Sprint(snmp.FECRecovered),
			
 
				+		fmt.Sprint(snmp.FECShortShards),
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// Copy make a copy of current snmp snapshot
			
 
				+func (s *Snmp) Copy() *Snmp {
			
 
				+	d := newSnmp()
			
 
				+	d.BytesSent = atomic.LoadUint64(&s.BytesSent)
			
 
				+	d.BytesReceived = atomic.LoadUint64(&s.BytesReceived)
			
 
				+	d.MaxConn = atomic.LoadUint64(&s.MaxConn)
			
 
				+	d.ActiveOpens = atomic.LoadUint64(&s.ActiveOpens)
			
 
				+	d.PassiveOpens = atomic.LoadUint64(&s.PassiveOpens)
			
 
				+	d.CurrEstab = atomic.LoadUint64(&s.CurrEstab)
			
 
				+	d.InErrs = atomic.LoadUint64(&s.InErrs)
			
 
				+	d.InCsumErrors = atomic.LoadUint64(&s.InCsumErrors)
			
 
				+	d.KCPInErrors = atomic.LoadUint64(&s.KCPInErrors)
			
 
				+	d.InPkts = atomic.LoadUint64(&s.InPkts)
			
 
				+	d.OutPkts = atomic.LoadUint64(&s.OutPkts)
			
 
				+	d.InSegs = atomic.LoadUint64(&s.InSegs)
			
 
				+	d.OutSegs = atomic.LoadUint64(&s.OutSegs)
			
 
				+	d.InBytes = atomic.LoadUint64(&s.InBytes)
			
 
				+	d.OutBytes = atomic.LoadUint64(&s.OutBytes)
			
 
				+	d.RetransSegs = atomic.LoadUint64(&s.RetransSegs)
			
 
				+	d.FastRetransSegs = atomic.LoadUint64(&s.FastRetransSegs)
			
 
				+	d.EarlyRetransSegs = atomic.LoadUint64(&s.EarlyRetransSegs)
			
 
				+	d.LostSegs = atomic.LoadUint64(&s.LostSegs)
			
 
				+	d.RepeatSegs = atomic.LoadUint64(&s.RepeatSegs)
			
 
				+	d.FECParityShards = atomic.LoadUint64(&s.FECParityShards)
			
 
				+	d.FECErrs = atomic.LoadUint64(&s.FECErrs)
			
 
				+	d.FECRecovered = atomic.LoadUint64(&s.FECRecovered)
			
 
				+	d.FECShortShards = atomic.LoadUint64(&s.FECShortShards)
			
 
				+	return d
			
 
				+}
			
 
				+
			
 
				+// Reset values to zero
			
 
				+func (s *Snmp) Reset() {
			
 
				+	atomic.StoreUint64(&s.BytesSent, 0)
			
 
				+	atomic.StoreUint64(&s.BytesReceived, 0)
			
 
				+	atomic.StoreUint64(&s.MaxConn, 0)
			
 
				+	atomic.StoreUint64(&s.ActiveOpens, 0)
			
 
				+	atomic.StoreUint64(&s.PassiveOpens, 0)
			
 
				+	atomic.StoreUint64(&s.CurrEstab, 0)
			
 
				+	atomic.StoreUint64(&s.InErrs, 0)
			
 
				+	atomic.StoreUint64(&s.InCsumErrors, 0)
			
 
				+	atomic.StoreUint64(&s.KCPInErrors, 0)
			
 
				+	atomic.StoreUint64(&s.InPkts, 0)
			
 
				+	atomic.StoreUint64(&s.OutPkts, 0)
			
 
				+	atomic.StoreUint64(&s.InSegs, 0)
			
 
				+	atomic.StoreUint64(&s.OutSegs, 0)
			
 
				+	atomic.StoreUint64(&s.InBytes, 0)
			
 
				+	atomic.StoreUint64(&s.OutBytes, 0)
			
 
				+	atomic.StoreUint64(&s.RetransSegs, 0)
			
 
				+	atomic.StoreUint64(&s.FastRetransSegs, 0)
			
 
				+	atomic.StoreUint64(&s.EarlyRetransSegs, 0)
			
 
				+	atomic.StoreUint64(&s.LostSegs, 0)
			
 
				+	atomic.StoreUint64(&s.RepeatSegs, 0)
			
 
				+	atomic.StoreUint64(&s.FECParityShards, 0)
			
 
				+	atomic.StoreUint64(&s.FECErrs, 0)
			
 
				+	atomic.StoreUint64(&s.FECRecovered, 0)
			
 
				+	atomic.StoreUint64(&s.FECShortShards, 0)
			
 
				+}
			
 
				+
			
 
				+// DefaultSnmp is the global KCP connection statistics collector
			
 
				+var DefaultSnmp *Snmp
			
 
				+
			
 
				+func init() {
			
 
				+	DefaultSnmp = newSnmp()
			
 
				+}
			
--- a/vendor/github.com/fatedier/kcp-go/updater.go
+++ b/vendor/github.com/fatedier/kcp-go/updater.go
@@ -0,0 +1,105 @@
 
				+package kcp
			
 
				+
			
 
				+import (
			
 
				+	"container/heap"
			
 
				+	"sync"
			
 
				+	"time"
			
 
				+)
			
 
				+
			
 
				+var updater updateHeap
			
 
				+
			
 
				+func init() {
			
 
				+	updater.init()
			
 
				+	go updater.updateTask()
			
 
				+}
			
 
				+
			
 
				+// entry contains a session update info
			
 
				+type entry struct {
			
 
				+	ts time.Time
			
 
				+	s  *UDPSession
			
 
				+}
			
 
				+
			
 
				+// a global heap managed kcp.flush() caller
			
 
				+type updateHeap struct {
			
 
				+	entries  []entry
			
 
				+	mu       sync.Mutex
			
 
				+	chWakeUp chan struct{}
			
 
				+}
			
 
				+
			
 
				+func (h *updateHeap) Len() int           { return len(h.entries) }
			
 
				+func (h *updateHeap) Less(i, j int) bool { return h.entries[i].ts.Before(h.entries[j].ts) }
			
 
				+func (h *updateHeap) Swap(i, j int) {
			
 
				+	h.entries[i], h.entries[j] = h.entries[j], h.entries[i]
			
 
				+	h.entries[i].s.updaterIdx = i
			
 
				+	h.entries[j].s.updaterIdx = j
			
 
				+}
			
 
				+
			
 
				+func (h *updateHeap) Push(x interface{}) {
			
 
				+	h.entries = append(h.entries, x.(entry))
			
 
				+	n := len(h.entries)
			
 
				+	h.entries[n-1].s.updaterIdx = n - 1
			
 
				+}
			
 
				+
			
 
				+func (h *updateHeap) Pop() interface{} {
			
 
				+	n := len(h.entries)
			
 
				+	x := h.entries[n-1]
			
 
				+	h.entries[n-1].s.updaterIdx = -1
			
 
				+	h.entries[n-1] = entry{} // manual set nil for GC
			
 
				+	h.entries = h.entries[0 : n-1]
			
 
				+	return x
			
 
				+}
			
 
				+
			
 
				+func (h *updateHeap) init() {
			
 
				+	h.chWakeUp = make(chan struct{}, 1)
			
 
				+}
			
 
				+
			
 
				+func (h *updateHeap) addSession(s *UDPSession) {
			
 
				+	h.mu.Lock()
			
 
				+	heap.Push(h, entry{time.Now(), s})
			
 
				+	h.mu.Unlock()
			
 
				+	h.wakeup()
			
 
				+}
			
 
				+
			
 
				+func (h *updateHeap) removeSession(s *UDPSession) {
			
 
				+	h.mu.Lock()
			
 
				+	if s.updaterIdx != -1 {
			
 
				+		heap.Remove(h, s.updaterIdx)
			
 
				+	}
			
 
				+	h.mu.Unlock()
			
 
				+}
			
 
				+
			
 
				+func (h *updateHeap) wakeup() {
			
 
				+	select {
			
 
				+	case h.chWakeUp <- struct{}{}:
			
 
				+	default:
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (h *updateHeap) updateTask() {
			
 
				+	var timer <-chan time.Time
			
 
				+	for {
			
 
				+		select {
			
 
				+		case <-timer:
			
 
				+		case <-h.chWakeUp:
			
 
				+		}
			
 
				+
			
 
				+		h.mu.Lock()
			
 
				+		hlen := h.Len()
			
 
				+		now := time.Now()
			
 
				+		for i := 0; i < hlen; i++ {
			
 
				+			entry := heap.Pop(h).(entry)
			
 
				+			if now.After(entry.ts) {
			
 
				+				entry.ts = now.Add(entry.s.update())
			
 
				+				heap.Push(h, entry)
			
 
				+			} else {
			
 
				+				heap.Push(h, entry)
			
 
				+				break
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if hlen > 0 {
			
 
				+			timer = time.After(h.entries[0].ts.Sub(now))
			
 
				+		}
			
 
				+		h.mu.Unlock()
			
 
				+	}
			
 
				+}
			
--- a/vendor/github.com/fatedier/kcp-go/xor.go
+++ b/vendor/github.com/fatedier/kcp-go/xor.go
@@ -0,0 +1,110 @@
 
				+// Copyright 2013 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package kcp
			
 
				+
			
 
				+import (
			
 
				+	"runtime"
			
 
				+	"unsafe"
			
 
				+)
			
 
				+
			
 
				+const wordSize = int(unsafe.Sizeof(uintptr(0)))
			
 
				+const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
			
 
				+
			
 
				+// fastXORBytes xors in bulk. It only works on architectures that
			
 
				+// support unaligned read/writes.
			
 
				+func fastXORBytes(dst, a, b []byte) int {
			
 
				+	n := len(a)
			
 
				+	if len(b) < n {
			
 
				+		n = len(b)
			
 
				+	}
			
 
				+
			
 
				+	w := n / wordSize
			
 
				+	if w > 0 {
			
 
				+		wordBytes := w * wordSize
			
 
				+		fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
			
 
				+	}
			
 
				+
			
 
				+	for i := (n - n%wordSize); i < n; i++ {
			
 
				+		dst[i] = a[i] ^ b[i]
			
 
				+	}
			
 
				+
			
 
				+	return n
			
 
				+}
			
 
				+
			
 
				+func safeXORBytes(dst, a, b []byte) int {
			
 
				+	n := len(a)
			
 
				+	if len(b) < n {
			
 
				+		n = len(b)
			
 
				+	}
			
 
				+	ex := n % 8
			
 
				+	for i := 0; i < ex; i++ {
			
 
				+		dst[i] = a[i] ^ b[i]
			
 
				+	}
			
 
				+
			
 
				+	for i := ex; i < n; i += 8 {
			
 
				+		_dst := dst[i : i+8]
			
 
				+		_a := a[i : i+8]
			
 
				+		_b := b[i : i+8]
			
 
				+		_dst[0] = _a[0] ^ _b[0]
			
 
				+		_dst[1] = _a[1] ^ _b[1]
			
 
				+		_dst[2] = _a[2] ^ _b[2]
			
 
				+		_dst[3] = _a[3] ^ _b[3]
			
 
				+
			
 
				+		_dst[4] = _a[4] ^ _b[4]
			
 
				+		_dst[5] = _a[5] ^ _b[5]
			
 
				+		_dst[6] = _a[6] ^ _b[6]
			
 
				+		_dst[7] = _a[7] ^ _b[7]
			
 
				+	}
			
 
				+	return n
			
 
				+}
			
 
				+
			
 
				+// xorBytes xors the bytes in a and b. The destination is assumed to have enough
			
 
				+// space. Returns the number of bytes xor'd.
			
 
				+func xorBytes(dst, a, b []byte) int {
			
 
				+	if supportsUnaligned {
			
 
				+		return fastXORBytes(dst, a, b)
			
 
				+	}
			
 
				+	// TODO(hanwen): if (dst, a, b) have common alignment
			
 
				+	// we could still try fastXORBytes. It is not clear
			
 
				+	// how often this happens, and it's only worth it if
			
 
				+	// the block encryption itself is hardware
			
 
				+	// accelerated.
			
 
				+	return safeXORBytes(dst, a, b)
			
 
				+}
			
 
				+
			
 
				+// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
			
 
				+// The arguments are assumed to be of equal length.
			
 
				+func fastXORWords(dst, a, b []byte) {
			
 
				+	dw := *(*[]uintptr)(unsafe.Pointer(&dst))
			
 
				+	aw := *(*[]uintptr)(unsafe.Pointer(&a))
			
 
				+	bw := *(*[]uintptr)(unsafe.Pointer(&b))
			
 
				+	n := len(b) / wordSize
			
 
				+	ex := n % 8
			
 
				+	for i := 0; i < ex; i++ {
			
 
				+		dw[i] = aw[i] ^ bw[i]
			
 
				+	}
			
 
				+
			
 
				+	for i := ex; i < n; i += 8 {
			
 
				+		_dw := dw[i : i+8]
			
 
				+		_aw := aw[i : i+8]
			
 
				+		_bw := bw[i : i+8]
			
 
				+		_dw[0] = _aw[0] ^ _bw[0]
			
 
				+		_dw[1] = _aw[1] ^ _bw[1]
			
 
				+		_dw[2] = _aw[2] ^ _bw[2]
			
 
				+		_dw[3] = _aw[3] ^ _bw[3]
			
 
				+		_dw[4] = _aw[4] ^ _bw[4]
			
 
				+		_dw[5] = _aw[5] ^ _bw[5]
			
 
				+		_dw[6] = _aw[6] ^ _bw[6]
			
 
				+		_dw[7] = _aw[7] ^ _bw[7]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func xorWords(dst, a, b []byte) {
			
 
				+	if supportsUnaligned {
			
 
				+		fastXORWords(dst, a, b)
			
 
				+	} else {
			
 
				+		safeXORBytes(dst, a, b)
			
 
				+	}
			
 
				+}
			
--- a/vendor/github.com/templexxx/cpufeat/.gitignore
+++ b/vendor/github.com/templexxx/cpufeat/.gitignore
@@ -0,0 +1,14 @@
 
				+# Binaries for programs and plugins
			
 
				+*.exe
			
 
				+*.dll
			
 
				+*.so
			
 
				+*.dylib
			
 
				+
			
 
				+# Test binary, build with `go test -c`
			
 
				+*.test
			
 
				+
			
 
				+# Output of the go coverage tool, specifically when used with LiteIDE
			
 
				+*.out
			
 
				+
			
 
				+# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
			
 
				+.glide/
			
--- a/vendor/github.com/templexxx/cpufeat/LICENSE
+++ b/vendor/github.com/templexxx/cpufeat/LICENSE
@@ -0,0 +1,27 @@
 
				+Copyright (c) 2009 The Go Authors. All rights reserved.
			
 
				+
			
 
				+Redistribution and use in source and binary forms, with or without
			
 
				+modification, are permitted provided that the following conditions are
			
 
				+met:
			
 
				+
			
 
				+   * Redistributions of source code must retain the above copyright
			
 
				+notice, this list of conditions and the following disclaimer.
			
 
				+   * Redistributions in binary form must reproduce the above
			
 
				+copyright notice, this list of conditions and the following disclaimer
			
 
				+in the documentation and/or other materials provided with the
			
 
				+distribution.
			
 
				+   * Neither the name of Google Inc. nor the names of its
			
 
				+contributors may be used to endorse or promote products derived from
			
 
				+this software without specific prior written permission.
			
 
				+
			
 
				+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
			
 
				+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
			
 
				+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
			
 
				+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
			
 
				+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
			
 
				+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
			
 
				+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
			
 
				+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
			
 
				+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
			
 
				+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
			
 
				+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
			
--- a/vendor/github.com/templexxx/cpufeat/cpu.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu.go
@@ -0,0 +1,32 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+// Package cpu implements processor feature detection
			
 
				+// used by the Go standard libary.
			
 
				+package cpufeat
			
 
				+
			
 
				+var X86 x86
			
 
				+
			
 
				+// The booleans in x86 contain the correspondingly named cpuid feature bit.
			
 
				+// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
			
 
				+// in addition to the cpuid feature bit being set.
			
 
				+// The struct is padded to avoid false sharing.
			
 
				+type x86 struct {
			
 
				+	_            [CacheLineSize]byte
			
 
				+	HasAES       bool
			
 
				+	HasAVX       bool
			
 
				+	HasAVX2      bool
			
 
				+	HasBMI1      bool
			
 
				+	HasBMI2      bool
			
 
				+	HasERMS      bool
			
 
				+	HasOSXSAVE   bool
			
 
				+	HasPCLMULQDQ bool
			
 
				+	HasPOPCNT    bool
			
 
				+	HasSSE2      bool
			
 
				+	HasSSE3      bool
			
 
				+	HasSSSE3     bool
			
 
				+	HasSSE41     bool
			
 
				+	HasSSE42     bool
			
 
				+	_            [CacheLineSize]byte
			
 
				+}
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_arm.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_arm.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 32
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_arm64.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_arm64.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 32
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_mips.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_mips.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 32
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_mips64.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_mips64.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 32
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_mips64le.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_mips64le.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 32
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_mipsle.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_mipsle.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 32
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_ppc64.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_ppc64.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 128
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_ppc64le.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 128
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_s390x.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_s390x.go
@@ -0,0 +1,7 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 256
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_x86.go
+++ b/vendor/github.com/templexxx/cpufeat/cpu_x86.go
@@ -0,0 +1,59 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+// +build 386 amd64 amd64p32
			
 
				+
			
 
				+package cpufeat
			
 
				+
			
 
				+const CacheLineSize = 64
			
 
				+
			
 
				+// cpuid is implemented in cpu_x86.s.
			
 
				+func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
			
 
				+
			
 
				+// xgetbv with ecx = 0 is implemented in cpu_x86.s.
			
 
				+func xgetbv() (eax, edx uint32)
			
 
				+
			
 
				+func init() {
			
 
				+	maxId, _, _, _ := cpuid(0, 0)
			
 
				+
			
 
				+	if maxId < 1 {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	_, _, ecx1, edx1 := cpuid(1, 0)
			
 
				+	X86.HasSSE2 = isSet(26, edx1)
			
 
				+
			
 
				+	X86.HasSSE3 = isSet(0, ecx1)
			
 
				+	X86.HasPCLMULQDQ = isSet(1, ecx1)
			
 
				+	X86.HasSSSE3 = isSet(9, ecx1)
			
 
				+	X86.HasSSE41 = isSet(19, ecx1)
			
 
				+	X86.HasSSE42 = isSet(20, ecx1)
			
 
				+	X86.HasPOPCNT = isSet(23, ecx1)
			
 
				+	X86.HasAES = isSet(25, ecx1)
			
 
				+	X86.HasOSXSAVE = isSet(27, ecx1)
			
 
				+
			
 
				+	osSupportsAVX := false
			
 
				+	// For XGETBV, OSXSAVE bit is required and sufficient.
			
 
				+	if X86.HasOSXSAVE {
			
 
				+		eax, _ := xgetbv()
			
 
				+		// Check if XMM and YMM registers have OS support.
			
 
				+		osSupportsAVX = isSet(1, eax) && isSet(2, eax)
			
 
				+	}
			
 
				+
			
 
				+	X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
			
 
				+
			
 
				+	if maxId < 7 {
			
 
				+		return
			
 
				+	}
			
 
				+
			
 
				+	_, ebx7, _, _ := cpuid(7, 0)
			
 
				+	X86.HasBMI1 = isSet(3, ebx7)
			
 
				+	X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
			
 
				+	X86.HasBMI2 = isSet(8, ebx7)
			
 
				+	X86.HasERMS = isSet(9, ebx7)
			
 
				+}
			
 
				+
			
 
				+func isSet(bitpos uint, value uint32) bool {
			
 
				+	return value&(1<<bitpos) != 0
			
 
				+}
			
--- a/vendor/github.com/templexxx/cpufeat/cpu_x86.s
+++ b/vendor/github.com/templexxx/cpufeat/cpu_x86.s
@@ -0,0 +1,32 @@
 
				+// Copyright 2017 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+// +build 386 amd64 amd64p32
			
 
				+
			
 
				+#include "textflag.h"
			
 
				+
			
 
				+// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
			
 
				+TEXT ·cpuid(SB), NOSPLIT, $0-24
			
 
				+	MOVL eaxArg+0(FP), AX
			
 
				+	MOVL ecxArg+4(FP), CX
			
 
				+	CPUID
			
 
				+	MOVL AX, eax+8(FP)
			
 
				+	MOVL BX, ebx+12(FP)
			
 
				+	MOVL CX, ecx+16(FP)
			
 
				+	MOVL DX, edx+20(FP)
			
 
				+	RET
			
 
				+
			
 
				+// func xgetbv() (eax, edx uint32)
			
 
				+TEXT ·xgetbv(SB),NOSPLIT,$0-8
			
 
				+#ifdef GOOS_nacl
			
 
				+	// nacl does not support XGETBV.
			
 
				+	MOVL $0, eax+0(FP)
			
 
				+	MOVL $0, edx+4(FP)
			
 
				+#else
			
 
				+	MOVL $0, CX
			
 
				+	WORD $0x010f; BYTE $0xd0 //XGETBV
			
 
				+	MOVL AX, eax+0(FP)
			
 
				+	MOVL DX, edx+4(FP)
			
 
				+#endif
			
 
				+	RET
			
--- a/vendor/github.com/templexxx/reedsolomon/.gitignore
+++ b/vendor/github.com/templexxx/reedsolomon/.gitignore
@@ -0,0 +1,40 @@
 
				+# Compiled Object files, Static and Dynamic libs (Shared Objects)
			
 
				+*.o
			
 
				+*.a
			
 
				+*.so
			
 
				+
			
 
				+# Folders
			
 
				+_obj
			
 
				+_test
			
 
				+
			
 
				+# Architecture specific extensions/prefixes
			
 
				+*.[568vq]
			
 
				+[568vq].out
			
 
				+
			
 
				+*.cgo1.go
			
 
				+*.cgo2.c
			
 
				+_cgo_defun.c
			
 
				+_cgo_gotypes.go
			
 
				+_cgo_export.*
			
 
				+
			
 
				+_testmain.go
			
 
				+
			
 
				+*.exe
			
 
				+*.test
			
 
				+*.prof
			
 
				+/.idea
			
 
				+/backup
			
 
				+/loopunroll/
			
 
				+cpu.out
			
 
				+mathtool/galois/
			
 
				+mathtool/matrix/
			
 
				+mem.out
			
 
				+/examples/
			
 
				+/.DS_Store
			
 
				+/mathtool/cntinverse
			
 
				+/invert
			
 
				+/bakcup
			
 
				+/buf.svg
			
 
				+*.svg
			
 
				+*.out
			
 
				+/escape
			
--- a/vendor/github.com/templexxx/reedsolomon/.travis.yml
+++ b/vendor/github.com/templexxx/reedsolomon/.travis.yml
@@ -0,0 +1,9 @@
 
				+language: go
			
 
				+go:
			
 
				+    - 1.9
			
 
				+
			
 
				+install:
			
 
				+    - go get github.com/templexxx/reedsolomon
			
 
				+
			
 
				+script:
			
 
				+    - go test -v
			
--- a/vendor/github.com/templexxx/reedsolomon/LICENSE
+++ b/vendor/github.com/templexxx/reedsolomon/LICENSE
@@ -0,0 +1,23 @@
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2017 Templexxx
			
 
				+Copyright (c) 2015 Klaus Post
			
 
				+Copyright (c) 2015 Backblaze
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
--- a/vendor/github.com/templexxx/reedsolomon/README.md
+++ b/vendor/github.com/templexxx/reedsolomon/README.md
@@ -0,0 +1,109 @@
 
				+# Reed-Solomon
			
 
				+
			
 
				+[![GoDoc][1]][2] [![MIT licensed][3]][4] [![Build Status][5]][6] [![Go Report Card][7]][8] 
			
 
				+
			
 
				+[1]: https://godoc.org/github.com/templexxx/reedsolomon?status.svg
			
 
				+[2]: https://godoc.org/github.com/templexxx/reedsolomon
			
 
				+[3]: https://img.shields.io/badge/license-MIT-blue.svg
			
 
				+[4]: LICENSE
			
 
				+[5]: https://travis-ci.org/templexxx/reedsolomon.svg?branch=master
			
 
				+[6]: https://travis-ci.org/templexxx/reedsolomon
			
 
				+[7]: https://goreportcard.com/badge/github.com/templexxx/reedsolomon
			
 
				+[8]: https://goreportcard.com/report/github.com/templexxx/reedsolomon
			
 
				+
			
 
				+
			
 
				+## Introduction:
			
 
				+1.  Reed-Solomon Erasure Code engine in pure Go.
			
 
				+2.  Super Fast: more than 10GB/s per physics core ( 10+4, 4KB per vector, Macbook Pro 2.8 GHz Intel Core i7 )
			
 
				+
			
 
				+## Installation
			
 
				+To get the package use the standard:
			
 
				+```bash
			
 
				+go get github.com/templexxx/reedsolomon
			
 
				+```
			
 
				+
			
 
				+## Documentation
			
 
				+See the associated [GoDoc](http://godoc.org/github.com/templexxx/reedsolomon)
			
 
				+
			
 
				+## Specification
			
 
				+### GOARCH
			
 
				+1. All arch are supported
			
 
				+2. 0.1.0 need go1.9 for sync.Map in AMD64
			
 
				+
			
 
				+### Math
			
 
				+1. Coding over in GF(2^8)
			
 
				+2. Primitive Polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x1d)
			
 
				+3. mathtool/gentbls.go : generator Primitive Polynomial and it's log table, exp table, multiply table, inverse table etc. We can get more info about how galois field work
			
 
				+4. mathtool/cntinverse.go : calculate how many inverse matrix will have in different RS codes config
			
 
				+5. Both of Cauchy and Vandermonde Matrix are supported. Vandermonde need more operations for preserving the property that any square subset of rows is invertible
			
 
				+
			
 
				+### Why so fast?
			
 
				+These three parts will cost too much time:
			
 
				+
			
 
				+1. lookup galois-field tables
			
 
				+2. read/write memory
			
 
				+3. calculate inverse matrix in the reconstruct process
			
 
				+
			
 
				+SIMD will solve no.1
			
 
				+
			
 
				+Cache-friendly codes will help to solve no.2 & no.3, and more, use a sync.Map for cache inverse matrix, it will help to save about 1000ns when we need same matrix. 
			
 
				+
			
 
				+## Performance
			
 
				+
			
 
				+Performance depends mainly on:
			
 
				+
			
 
				+1. CPU instruction extension( AVX2 or SSSE3 or none )
			
 
				+2. number of data/parity vects
			
 
				+3. unit size of calculation ( see it in rs_amd64.go )
			
 
				+4. size of shards
			
 
				+5. speed of memory (waste so much time on read/write mem, :D )
			
 
				+6. performance of CPU
			
 
				+7. the way of using ( reuse memory)
			
 
				+
			
 
				+And we must know the benchmark test is quite different with encoding/decoding in practice.
			
 
				+
			
 
				+Because in benchmark test loops, the CPU Cache will help a lot. In practice, we must reuse the memory to make the performance become as good as the benchmark test.
			
 
				+
			
 
				+Example of performance on my MacBook 2017 i7 2.8GHz. 10+4 (with 0.1.0).
			
 
				+
			
 
				+### Encoding:
			
 
				+
			
 
				+| Vector size | Speed (MB/S) |
			
 
				+|----------------|--------------|
			
 
				+| 1400B              |    7655.02  |
			
 
				+| 4KB              |       10551.37  |
			
 
				+| 64KB              |       9297.25 |
			
 
				+| 1MB              |      6829.89 |
			
 
				+| 16MB              |      6312.83 |
			
 
				+
			
 
				+### Reconstruct (use nil to point which one need repair):
			
 
				+
			
 
				+| Vector size | Speed (MB/S) |
			
 
				+|----------------|--------------|
			
 
				+| 1400B              |    4124.85  |
			
 
				+| 4KB              |       5715.45 |
			
 
				+| 64KB              |       6050.06 |
			
 
				+| 1MB              |      5001.21 |
			
 
				+| 16MB              |      5043.04 |
			
 
				+
			
 
				+### ReconstructWithPos (use a position list to point which one need repair, reuse the memory):
			
 
				+
			
 
				+| Vector size | Speed (MB/S) |
			
 
				+|----------------|--------------|
			
 
				+| 1400B              |    6170.24  |
			
 
				+| 4KB              |       9444.86 |
			
 
				+| 64KB              |       9311.30 |
			
 
				+| 1MB              |      6781.06 |
			
 
				+| 16MB              |      6285.34 |
			
 
				+
			
 
				+**reconstruct benchmark tests here run with inverse matrix cache, if there is no cache, it will cost more time( about 1000ns)**
			
 
				+
			
 
				+## Who is using this?
			
 
				+
			
 
				+1. https://github.com/xtaci/kcp-go -- A Production-Grade Reliable-UDP Library for golang
			
 
				+
			
 
				+## Links & Thanks
			
 
				+* [Klauspost ReedSolomon](https://github.com/klauspost/reedsolomon)
			
 
				+* [intel ISA-L](https://github.com/01org/isa-l)
			
 
				+* [GF SIMD] (http://www.ssrc.ucsc.edu/papers/plank-fast13.pdf)
			
 
				+* [asm2plan9s] (https://github.com/fwessels/asm2plan9s)
			
--- a/vendor/github.com/templexxx/reedsolomon/matrix.go
+++ b/vendor/github.com/templexxx/reedsolomon/matrix.go
@@ -0,0 +1,156 @@
 
				+package reedsolomon
			
 
				+
			
 
				+import "errors"
			
 
				+
			
 
				+type matrix []byte
			
 
				+
			
 
				+func genEncMatrixCauchy(d, p int) matrix {
			
 
				+	t := d + p
			
 
				+	m := make([]byte, t*d)
			
 
				+	for i := 0; i < d; i++ {
			
 
				+		m[i*d+i] = byte(1)
			
 
				+	}
			
 
				+
			
 
				+	d2 := d * d
			
 
				+	for i := d; i < t; i++ {
			
 
				+		for j := 0; j < d; j++ {
			
 
				+			d := i ^ j
			
 
				+			a := inverseTbl[d]
			
 
				+			m[d2] = byte(a)
			
 
				+			d2++
			
 
				+		}
			
 
				+	}
			
 
				+	return m
			
 
				+}
			
 
				+
			
 
				+func gfExp(b byte, n int) byte {
			
 
				+	if n == 0 {
			
 
				+		return 1
			
 
				+	}
			
 
				+	if b == 0 {
			
 
				+		return 0
			
 
				+	}
			
 
				+	a := logTbl[b]
			
 
				+	ret := int(a) * n
			
 
				+	for ret >= 255 {
			
 
				+		ret -= 255
			
 
				+	}
			
 
				+	return byte(expTbl[ret])
			
 
				+}
			
 
				+
			
 
				+func genVandMatrix(vm []byte, t, d int) {
			
 
				+	for i := 0; i < t; i++ {
			
 
				+		for j := 0; j < d; j++ {
			
 
				+			vm[i*d+j] = gfExp(byte(i), j)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (m matrix) mul(right matrix, rows, cols int, r []byte) {
			
 
				+	for i := 0; i < rows; i++ {
			
 
				+		for j := 0; j < cols; j++ {
			
 
				+			var v byte
			
 
				+			for k := 0; k < cols; k++ {
			
 
				+				v ^= gfMul(m[i*cols+k], right[k*cols+j])
			
 
				+			}
			
 
				+			r[i*cols+j] = v
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func genEncMatrixVand(d, p int) (matrix, error) {
			
 
				+	t := d + p
			
 
				+	buf := make([]byte, (2*t+4*d)*d)
			
 
				+	vm := buf[:t*d]
			
 
				+	genVandMatrix(vm, t, d)
			
 
				+	top := buf[t*d : (t+d)*d]
			
 
				+	copy(top, vm[:d*d])
			
 
				+	raw := buf[(t+d)*d : (t+3*d)*d]
			
 
				+	im := buf[(t+3*d)*d : (t+4*d)*d]
			
 
				+	err := matrix(top).invert(raw, d, im)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	r := buf[(t+4*d)*d : (2*t+4*d)*d]
			
 
				+	matrix(vm).mul(im, t, d, r)
			
 
				+	return matrix(r), nil
			
 
				+}
			
 
				+
			
 
				+// [I|m'] -> [m']
			
 
				+func (m matrix) subMatrix(n int, r []byte) {
			
 
				+	for i := 0; i < n; i++ {
			
 
				+		off := i * n
			
 
				+		copy(r[off:off+n], m[2*off+n:2*(off+n)])
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (m matrix) invert(raw matrix, n int, im []byte) error {
			
 
				+	// [m] -> [m|I]
			
 
				+	for i := 0; i < n; i++ {
			
 
				+		t := i * n
			
 
				+		copy(raw[2*t:2*t+n], m[t:t+n])
			
 
				+		raw[2*t+i+n] = byte(1)
			
 
				+	}
			
 
				+	err := gauss(raw, n)
			
 
				+	if err != nil {
			
 
				+		return err
			
 
				+	}
			
 
				+	raw.subMatrix(n, im)
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+func (m matrix) swap(i, j, n int) {
			
 
				+	for k := 0; k < n; k++ {
			
 
				+		m[i*n+k], m[j*n+k] = m[j*n+k], m[i*n+k]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func gfMul(a, b byte) byte {
			
 
				+	return mulTbl[a][b]
			
 
				+}
			
 
				+
			
 
				+var errSingular = errors.New("rs.invert: matrix is singular")
			
 
				+
			
 
				+// [m|I] -> [I|m']
			
 
				+func gauss(m matrix, n int) error {
			
 
				+	n2 := 2 * n
			
 
				+	for i := 0; i < n; i++ {
			
 
				+		if m[i*n2+i] == 0 {
			
 
				+			for j := i + 1; j < n; j++ {
			
 
				+				if m[j*n2+i] != 0 {
			
 
				+					m.swap(i, j, n2)
			
 
				+					break
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		if m[i*n2+i] == 0 {
			
 
				+			return errSingular
			
 
				+		}
			
 
				+		if m[i*n2+i] != 1 {
			
 
				+			d := m[i*n2+i]
			
 
				+			scale := inverseTbl[d]
			
 
				+			for c := 0; c < n2; c++ {
			
 
				+				m[i*n2+c] = gfMul(m[i*n2+c], scale)
			
 
				+			}
			
 
				+		}
			
 
				+		for j := i + 1; j < n; j++ {
			
 
				+			if m[j*n2+i] != 0 {
			
 
				+				scale := m[j*n2+i]
			
 
				+				for c := 0; c < n2; c++ {
			
 
				+					m[j*n2+c] ^= gfMul(scale, m[i*n2+c])
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	for k := 0; k < n; k++ {
			
 
				+		for j := 0; j < k; j++ {
			
 
				+			if m[j*n2+k] != 0 {
			
 
				+				scale := m[j*n2+k]
			
 
				+				for c := 0; c < n2; c++ {
			
 
				+					m[j*n2+c] ^= gfMul(scale, m[k*n2+c])
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
--- a/vendor/github.com/templexxx/reedsolomon/rs.go
+++ b/vendor/github.com/templexxx/reedsolomon/rs.go
@@ -0,0 +1,280 @@
 
				+/*
			
 
				+	Reed-Solomon Codes over GF(2^8)
			
 
				+	Primitive Polynomial:  x^8+x^4+x^3+x^2+1
			
 
				+	Galois Filed arithmetic using Intel SIMD instructions (AVX2 or SSSE3)
			
 
				+*/
			
 
				+
			
 
				+package reedsolomon
			
 
				+
			
 
				+import "errors"
			
 
				+
			
 
				+// Encoder implements for Reed-Solomon Encoding/Reconstructing
			
 
				+type Encoder interface {
			
 
				+	// Encode multiply generator-matrix with data
			
 
				+	// len(vects) must be equal with num of data+parity
			
 
				+	Encode(vects [][]byte) error
			
 
				+	// Result of reconst will be put into origin position of vects
			
 
				+	// it means if you lost vects[0], after reconst the vects[0]'s data will be back in vects[0]
			
 
				+
			
 
				+	// Reconstruct repair lost data & parity
			
 
				+	// Set vect nil if lost
			
 
				+	Reconstruct(vects [][]byte) error
			
 
				+	// Reconstruct repair lost data
			
 
				+	// Set vect nil if lost
			
 
				+	ReconstructData(vects [][]byte) error
			
 
				+	// ReconstWithPos repair lost data&parity with has&lost vects position
			
 
				+	// Save bandwidth&disk I/O (cmp with Reconstruct, if the lost is less than num of parity)
			
 
				+	// As erasure codes, we must know which vect is broken,
			
 
				+	// so it's necessary to provide such APIs
			
 
				+	// len(has) must equal num of data vects
			
 
				+	// Example:
			
 
				+	// in 3+2, the whole position: [0,1,2,3,4]
			
 
				+	// if lost vects[0]
			
 
				+	// the "has" could be [1,2,3] or [1,2,4] or ...
			
 
				+	// then you must be sure that vects[1] vects[2] vects[3] have correct data (if the "has" is [1,2,3])
			
 
				+	// the "dLost" will be [0]
			
 
				+	// ps:
			
 
				+	// 1. the above lists are in increasing orders  TODO support out-of-order
			
 
				+	// 2. each vect has same len, don't set it nil
			
 
				+	// so we don't need to make slice
			
 
				+	ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error
			
 
				+	//// ReconstWithPos repair lost data with survived&lost vects position
			
 
				+	//// Don't need to append position of parity lost into "lost"
			
 
				+	ReconstDataWithPos(vects [][]byte, has, dLost []int) error
			
 
				+}
			
 
				+
			
 
				+func checkCfg(d, p int) error {
			
 
				+	if (d <= 0) || (p <= 0) {
			
 
				+		return errors.New("rs.New: data or parity <= 0")
			
 
				+	}
			
 
				+	if d+p >= 256 {
			
 
				+		return errors.New("rs.New: data+parity >= 256")
			
 
				+	}
			
 
				+	return nil
			
 
				+}
			
 
				+
			
 
				+// New create an Encoder (vandermonde matrix as Encoding matrix)
			
 
				+func New(data, parity int) (enc Encoder, err error) {
			
 
				+	err = checkCfg(data, parity)
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+	e, err := genEncMatrixVand(data, parity)
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+	return newRS(data, parity, e), nil
			
 
				+}
			
 
				+
			
 
				+// NewCauchy create an Encoder (cauchy matrix as Generator Matrix)
			
 
				+func NewCauchy(data, parity int) (enc Encoder, err error) {
			
 
				+	err = checkCfg(data, parity)
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+	e := genEncMatrixCauchy(data, parity)
			
 
				+	return newRS(data, parity, e), nil
			
 
				+}
			
 
				+
			
 
				+type encBase struct {
			
 
				+	data   int
			
 
				+	parity int
			
 
				+	encode []byte
			
 
				+	gen    []byte
			
 
				+}
			
 
				+
			
 
				+func checkEnc(d, p int, vs [][]byte) (size int, err error) {
			
 
				+	total := len(vs)
			
 
				+	if d+p != total {
			
 
				+		err = errors.New("rs.checkER: vects not match rs args")
			
 
				+		return
			
 
				+	}
			
 
				+	size = len(vs[0])
			
 
				+	if size == 0 {
			
 
				+		err = errors.New("rs.checkER: vects size = 0")
			
 
				+		return
			
 
				+	}
			
 
				+	for i := 1; i < total; i++ {
			
 
				+		if len(vs[i]) != size {
			
 
				+			err = errors.New("rs.checkER: vects size mismatch")
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (e *encBase) Encode(vects [][]byte) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	_, err = checkEnc(d, p, vects)
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+	dv := vects[:d]
			
 
				+	pv := vects[d:]
			
 
				+	g := e.gen
			
 
				+	for i := 0; i < d; i++ {
			
 
				+		for j := 0; j < p; j++ {
			
 
				+			if i != 0 {
			
 
				+				mulVectAdd(g[j*d+i], dv[i], pv[j])
			
 
				+			} else {
			
 
				+				mulVect(g[j*d], dv[0], pv[j])
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func mulVect(c byte, a, b []byte) {
			
 
				+	t := mulTbl[c]
			
 
				+	for i := 0; i < len(a); i++ {
			
 
				+		b[i] = t[a[i]]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func mulVectAdd(c byte, a, b []byte) {
			
 
				+	t := mulTbl[c]
			
 
				+	for i := 0; i < len(a); i++ {
			
 
				+		b[i] ^= t[a[i]]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (e *encBase) Reconstruct(vects [][]byte) (err error) {
			
 
				+	return e.reconstruct(vects, false)
			
 
				+}
			
 
				+
			
 
				+func (e *encBase) ReconstructData(vects [][]byte) (err error) {
			
 
				+	return e.reconstruct(vects, true)
			
 
				+}
			
 
				+
			
 
				+func (e *encBase) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
			
 
				+	return e.reconstWithPos(vects, has, dLost, pLost, false)
			
 
				+}
			
 
				+
			
 
				+func (e *encBase) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
			
 
				+	return e.reconstWithPos(vects, has, dLost, nil, true)
			
 
				+}
			
 
				+
			
 
				+func (e *encBase) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	em := e.encode
			
 
				+	dCnt := len(dLost)
			
 
				+	size := len(vects[has[0]])
			
 
				+	if dCnt != 0 {
			
 
				+		vtmp := make([][]byte, d+dCnt)
			
 
				+		for i, p := range has {
			
 
				+			vtmp[i] = vects[p]
			
 
				+		}
			
 
				+		for i, p := range dLost {
			
 
				+			if len(vects[p]) == 0 {
			
 
				+				vects[p] = make([]byte, size)
			
 
				+			}
			
 
				+			vtmp[i+d] = vects[p]
			
 
				+		}
			
 
				+		matrixbuf := make([]byte, 4*d*d+dCnt*d)
			
 
				+		m := matrixbuf[:d*d]
			
 
				+		for i, l := range has {
			
 
				+			copy(m[i*d:i*d+d], em[l*d:l*d+d])
			
 
				+		}
			
 
				+		raw := matrixbuf[d*d : 3*d*d]
			
 
				+		im := matrixbuf[3*d*d : 4*d*d]
			
 
				+		err2 := matrix(m).invert(raw, d, im)
			
 
				+		if err2 != nil {
			
 
				+			return err2
			
 
				+		}
			
 
				+		g := matrixbuf[4*d*d:]
			
 
				+		for i, l := range dLost {
			
 
				+			copy(g[i*d:i*d+d], im[l*d:l*d+d])
			
 
				+		}
			
 
				+		etmp := &encBase{data: d, parity: dCnt, gen: g}
			
 
				+		err2 = etmp.Encode(vtmp[:d+dCnt])
			
 
				+		if err2 != nil {
			
 
				+			return err2
			
 
				+		}
			
 
				+	}
			
 
				+	if dataOnly {
			
 
				+		return
			
 
				+	}
			
 
				+	pCnt := len(pLost)
			
 
				+	if pCnt != 0 {
			
 
				+		vtmp := make([][]byte, d+pCnt)
			
 
				+		g := make([]byte, pCnt*d)
			
 
				+		for i, l := range pLost {
			
 
				+			copy(g[i*d:i*d+d], em[l*d:l*d+d])
			
 
				+		}
			
 
				+		for i := 0; i < d; i++ {
			
 
				+			vtmp[i] = vects[i]
			
 
				+		}
			
 
				+		for i, p := range pLost {
			
 
				+			if len(vects[p]) == 0 {
			
 
				+				vects[p] = make([]byte, size)
			
 
				+			}
			
 
				+			vtmp[i+d] = vects[p]
			
 
				+		}
			
 
				+		etmp := &encBase{data: d, parity: pCnt, gen: g}
			
 
				+		err2 := etmp.Encode(vtmp[:d+pCnt])
			
 
				+		if err2 != nil {
			
 
				+			return err2
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (e *encBase) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	// TODO check more, maybe element in has show in lost & deal with len(has) > d
			
 
				+	if len(has) != d {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	dCnt := len(dLost)
			
 
				+	if dCnt > p {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	pCnt := len(pLost)
			
 
				+	if pCnt > p {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	return e.reconst(vects, has, dLost, pLost, dataOnly)
			
 
				+}
			
 
				+
			
 
				+func (e *encBase) reconstruct(vects [][]byte, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	t := d + p
			
 
				+	listBuf := make([]int, t+p)
			
 
				+	has := listBuf[:d]
			
 
				+	dLost := listBuf[d:t]
			
 
				+	pLost := listBuf[t : t+p]
			
 
				+	hasCnt, dCnt, pCnt := 0, 0, 0
			
 
				+	for i := 0; i < t; i++ {
			
 
				+		if vects[i] != nil {
			
 
				+			if hasCnt < d {
			
 
				+				has[hasCnt] = i
			
 
				+				hasCnt++
			
 
				+			}
			
 
				+		} else {
			
 
				+			if i < d {
			
 
				+				if dCnt < p {
			
 
				+					dLost[dCnt] = i
			
 
				+					dCnt++
			
 
				+				} else {
			
 
				+					return errors.New("rs.Reconst: not enough vects")
			
 
				+				}
			
 
				+			} else {
			
 
				+				if pCnt < p {
			
 
				+					pLost[pCnt] = i
			
 
				+					pCnt++
			
 
				+				} else {
			
 
				+					return errors.New("rs.Reconst: not enough vects")
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	if hasCnt != d {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	dLost = dLost[:dCnt]
			
 
				+	pLost = pLost[:pCnt]
			
 
				+	return e.reconst(vects, has, dLost, pLost, dataOnly)
			
 
				+}
			
--- a/vendor/github.com/templexxx/reedsolomon/rs_amd64.go
+++ b/vendor/github.com/templexxx/reedsolomon/rs_amd64.go
@@ -0,0 +1,868 @@
 
				+package reedsolomon
			
 
				+
			
 
				+import (
			
 
				+	"errors"
			
 
				+	"sync"
			
 
				+
			
 
				+	"github.com/templexxx/cpufeat"
			
 
				+)
			
 
				+
			
 
				+// SIMD Instruction Extensions
			
 
				+const (
			
 
				+	none = iota
			
 
				+	avx2
			
 
				+	ssse3
			
 
				+)
			
 
				+
			
 
				+var extension = none
			
 
				+
			
 
				+func init() {
			
 
				+	getEXT()
			
 
				+}
			
 
				+
			
 
				+func getEXT() {
			
 
				+	if cpufeat.X86.HasAVX2 {
			
 
				+		extension = avx2
			
 
				+		return
			
 
				+	} else if cpufeat.X86.HasSSSE3 {
			
 
				+		extension = ssse3
			
 
				+		return
			
 
				+	} else {
			
 
				+		extension = none
			
 
				+		return
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//go:noescape
			
 
				+func copy32B(dst, src []byte) // Need SSE2(introduced in 2001)
			
 
				+
			
 
				+func initTbl(g matrix, rows, cols int, tbl []byte) {
			
 
				+	off := 0
			
 
				+	for i := 0; i < cols; i++ {
			
 
				+		for j := 0; j < rows; j++ {
			
 
				+			c := g[j*cols+i]
			
 
				+			t := lowhighTbl[c][:]
			
 
				+			copy32B(tbl[off:off+32], t)
			
 
				+			off += 32
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// At most 3060 inverse matrix (when data=14, parity=4, calc by mathtool/cntinverse)
			
 
				+// In practice,  data usually below 12, parity below 5
			
 
				+func okCache(data, parity int) bool {
			
 
				+	if data < 15 && parity < 5 { // you can change it, but the data+parity can't be bigger than 32 (tips: see the codes about make inverse matrix)
			
 
				+		return true
			
 
				+	}
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+type (
			
 
				+	encSSSE3 encSIMD
			
 
				+	encAVX2  encSIMD
			
 
				+	encSIMD  struct {
			
 
				+		data   int
			
 
				+		parity int
			
 
				+		encode matrix
			
 
				+		gen    matrix
			
 
				+		tbl    []byte
			
 
				+		// inverse matrix cache is design for small vect size ( < 4KB )
			
 
				+		// it will save time for calculating inverse matrix
			
 
				+		// but it's not so important for big vect size
			
 
				+		enableCache  bool
			
 
				+		inverseCache iCache
			
 
				+	}
			
 
				+	iCache struct {
			
 
				+		sync.RWMutex
			
 
				+		data map[uint32][]byte
			
 
				+	}
			
 
				+)
			
 
				+
			
 
				+func newRS(d, p int, em matrix) (enc Encoder) {
			
 
				+	g := em[d*d:]
			
 
				+	if extension == none {
			
 
				+		return &encBase{data: d, parity: p, encode: em, gen: g}
			
 
				+	}
			
 
				+	t := make([]byte, d*p*32)
			
 
				+	initTbl(g, p, d, t)
			
 
				+	ok := okCache(d, p)
			
 
				+	if extension == avx2 {
			
 
				+		e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
			
 
				+			inverseCache: iCache{data: make(map[uint32][]byte)}}
			
 
				+		return e
			
 
				+	}
			
 
				+	e := &encSSSE3{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
			
 
				+		inverseCache: iCache{data: make(map[uint32][]byte)}}
			
 
				+	return e
			
 
				+}
			
 
				+
			
 
				+// Size of sub-vector
			
 
				+const unit int = 16 * 1024
			
 
				+
			
 
				+func getDo(n int) int {
			
 
				+	if n < unit {
			
 
				+		c := n >> 4
			
 
				+		if c == 0 {
			
 
				+			return unit
			
 
				+		}
			
 
				+		return c << 4
			
 
				+	}
			
 
				+	return unit
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) Encode(vects [][]byte) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	size, err := checkEnc(d, p, vects)
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+	dv := vects[:d]
			
 
				+	pv := vects[d:]
			
 
				+	start, end := 0, 0
			
 
				+	do := getDo(size)
			
 
				+	for start < size {
			
 
				+		end = start + do
			
 
				+		if end <= size {
			
 
				+			e.matrixMul(start, end, dv, pv)
			
 
				+			start = end
			
 
				+		} else {
			
 
				+			e.matrixMulRemain(start, size, dv, pv)
			
 
				+			start = size
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+//go:noescape
			
 
				+func mulVectAVX2(tbl, d, p []byte)
			
 
				+
			
 
				+//go:noescape
			
 
				+func mulVectAddAVX2(tbl, d, p []byte)
			
 
				+
			
 
				+func (e *encAVX2) matrixMul(start, end int, dv, pv [][]byte) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	tbl := e.tbl
			
 
				+	off := 0
			
 
				+	for i := 0; i < d; i++ {
			
 
				+		for j := 0; j < p; j++ {
			
 
				+			t := tbl[off : off+32]
			
 
				+			if i != 0 {
			
 
				+				mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
			
 
				+			} else {
			
 
				+				mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
			
 
				+			}
			
 
				+			off += 32
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) matrixMulRemain(start, end int, dv, pv [][]byte) {
			
 
				+	undone := end - start
			
 
				+	do := (undone >> 4) << 4
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	tbl := e.tbl
			
 
				+	if do >= 16 {
			
 
				+		end2 := start + do
			
 
				+		off := 0
			
 
				+		for i := 0; i < d; i++ {
			
 
				+			for j := 0; j < p; j++ {
			
 
				+				t := tbl[off : off+32]
			
 
				+				if i != 0 {
			
 
				+					mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
			
 
				+				} else {
			
 
				+					mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
			
 
				+				}
			
 
				+				off += 32
			
 
				+			}
			
 
				+		}
			
 
				+		start = end
			
 
				+	}
			
 
				+	if undone > do {
			
 
				+		// may recalculate some data, but still improve a lot
			
 
				+		start2 := end - 16
			
 
				+		if start2 >= 0 {
			
 
				+			off := 0
			
 
				+			for i := 0; i < d; i++ {
			
 
				+				for j := 0; j < p; j++ {
			
 
				+					t := tbl[off : off+32]
			
 
				+					if i != 0 {
			
 
				+						mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
			
 
				+					} else {
			
 
				+						mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
			
 
				+					}
			
 
				+					off += 32
			
 
				+				}
			
 
				+			}
			
 
				+		} else {
			
 
				+			g := e.gen
			
 
				+			for i := 0; i < d; i++ {
			
 
				+				for j := 0; j < p; j++ {
			
 
				+					if i != 0 {
			
 
				+						mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
			
 
				+					} else {
			
 
				+						mulVect(g[j*d], dv[0][start:], pv[j][start:])
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// use generator-matrix but not tbls for encoding
			
 
				+// it's design for reconstructing
			
 
				+// for small vects, it cost to much time on initTbl, so drop it
			
 
				+// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
			
 
				+func (e *encAVX2) encodeGen(vects [][]byte) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	size, err := checkEnc(d, p, vects)
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+	dv := vects[:d]
			
 
				+	pv := vects[d:]
			
 
				+	start, end := 0, 0
			
 
				+	do := getDo(size)
			
 
				+	for start < size {
			
 
				+		end = start + do
			
 
				+		if end <= size {
			
 
				+			e.matrixMulGen(start, end, dv, pv)
			
 
				+			start = end
			
 
				+		} else {
			
 
				+			e.matrixMulRemainGen(start, size, dv, pv)
			
 
				+			start = size
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) matrixMulGen(start, end int, dv, pv [][]byte) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	g := e.gen
			
 
				+	for i := 0; i < d; i++ {
			
 
				+		for j := 0; j < p; j++ {
			
 
				+			t := lowhighTbl[g[j*d+i]][:]
			
 
				+			if i != 0 {
			
 
				+				mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
			
 
				+			} else {
			
 
				+				mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
			
 
				+	undone := end - start
			
 
				+	do := (undone >> 4) << 4
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	g := e.gen
			
 
				+	if do >= 16 {
			
 
				+		end2 := start + do
			
 
				+		for i := 0; i < d; i++ {
			
 
				+			for j := 0; j < p; j++ {
			
 
				+				t := lowhighTbl[g[j*d+i]][:]
			
 
				+				if i != 0 {
			
 
				+					mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
			
 
				+				} else {
			
 
				+					mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		start = end
			
 
				+	}
			
 
				+	if undone > do {
			
 
				+		start2 := end - 16
			
 
				+		if start2 >= 0 {
			
 
				+			for i := 0; i < d; i++ {
			
 
				+				for j := 0; j < p; j++ {
			
 
				+					t := lowhighTbl[g[j*d+i]][:]
			
 
				+					if i != 0 {
			
 
				+						mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
			
 
				+					} else {
			
 
				+						mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		} else {
			
 
				+			for i := 0; i < d; i++ {
			
 
				+				for j := 0; j < p; j++ {
			
 
				+					if i != 0 {
			
 
				+						mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
			
 
				+					} else {
			
 
				+						mulVect(g[j*d], dv[0][start:], pv[j][start:])
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) Reconstruct(vects [][]byte) (err error) {
			
 
				+	return e.reconstruct(vects, false)
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) ReconstructData(vects [][]byte) (err error) {
			
 
				+	return e.reconstruct(vects, true)
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
			
 
				+	return e.reconstWithPos(vects, has, dLost, pLost, false)
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
			
 
				+	return e.reconstWithPos(vects, has, dLost, nil, true)
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) makeGen(has, dLost []int) (gen []byte, err error) {
			
 
				+	d := e.data
			
 
				+	em := e.encode
			
 
				+	cnt := len(dLost)
			
 
				+	if !e.enableCache {
			
 
				+		matrixbuf := make([]byte, 4*d*d+cnt*d)
			
 
				+		m := matrixbuf[:d*d]
			
 
				+		for i, l := range has {
			
 
				+			copy(m[i*d:i*d+d], em[l*d:l*d+d])
			
 
				+		}
			
 
				+		raw := matrixbuf[d*d : 3*d*d]
			
 
				+		im := matrixbuf[3*d*d : 4*d*d]
			
 
				+		err2 := matrix(m).invert(raw, d, im)
			
 
				+		if err2 != nil {
			
 
				+			return nil, err2
			
 
				+		}
			
 
				+		g := matrixbuf[4*d*d:]
			
 
				+		for i, l := range dLost {
			
 
				+			copy(g[i*d:i*d+d], im[l*d:l*d+d])
			
 
				+		}
			
 
				+		return g, nil
			
 
				+	}
			
 
				+	var ikey uint32
			
 
				+	for _, p := range has {
			
 
				+		ikey += 1 << uint8(p)
			
 
				+	}
			
 
				+	e.inverseCache.RLock()
			
 
				+	v, ok := e.inverseCache.data[ikey]
			
 
				+	if ok {
			
 
				+		im := v
			
 
				+		g := make([]byte, cnt*d)
			
 
				+		for i, l := range dLost {
			
 
				+			copy(g[i*d:i*d+d], im[l*d:l*d+d])
			
 
				+		}
			
 
				+		e.inverseCache.RUnlock()
			
 
				+		return g, nil
			
 
				+	}
			
 
				+	e.inverseCache.RUnlock()
			
 
				+	matrixbuf := make([]byte, 4*d*d+cnt*d)
			
 
				+	m := matrixbuf[:d*d]
			
 
				+	for i, l := range has {
			
 
				+		copy(m[i*d:i*d+d], em[l*d:l*d+d])
			
 
				+	}
			
 
				+	raw := matrixbuf[d*d : 3*d*d]
			
 
				+	im := matrixbuf[3*d*d : 4*d*d]
			
 
				+	err2 := matrix(m).invert(raw, d, im)
			
 
				+	if err2 != nil {
			
 
				+		return nil, err2
			
 
				+	}
			
 
				+	e.inverseCache.Lock()
			
 
				+	e.inverseCache.data[ikey] = im
			
 
				+	e.inverseCache.Unlock()
			
 
				+	g := matrixbuf[4*d*d:]
			
 
				+	for i, l := range dLost {
			
 
				+		copy(g[i*d:i*d+d], im[l*d:l*d+d])
			
 
				+	}
			
 
				+	return g, nil
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	em := e.encode
			
 
				+	dCnt := len(dLost)
			
 
				+	size := len(vects[has[0]])
			
 
				+	if dCnt != 0 {
			
 
				+		vtmp := make([][]byte, d+dCnt)
			
 
				+		for i, p := range has {
			
 
				+			vtmp[i] = vects[p]
			
 
				+		}
			
 
				+		for i, p := range dLost {
			
 
				+			if len(vects[p]) == 0 {
			
 
				+				vects[p] = make([]byte, size)
			
 
				+			}
			
 
				+			vtmp[i+d] = vects[p]
			
 
				+		}
			
 
				+		g, err2 := e.makeGen(has, dLost)
			
 
				+		if err2 != nil {
			
 
				+			return
			
 
				+		}
			
 
				+		etmp := &encAVX2{data: d, parity: dCnt, gen: g}
			
 
				+		err2 = etmp.encodeGen(vtmp)
			
 
				+		if err2 != nil {
			
 
				+			return err2
			
 
				+		}
			
 
				+	}
			
 
				+	if dataOnly {
			
 
				+		return
			
 
				+	}
			
 
				+	pCnt := len(pLost)
			
 
				+	if pCnt != 0 {
			
 
				+		g := make([]byte, pCnt*d)
			
 
				+		for i, l := range pLost {
			
 
				+			copy(g[i*d:i*d+d], em[l*d:l*d+d])
			
 
				+		}
			
 
				+		vtmp := make([][]byte, d+pCnt)
			
 
				+		for i := 0; i < d; i++ {
			
 
				+			vtmp[i] = vects[i]
			
 
				+		}
			
 
				+		for i, p := range pLost {
			
 
				+			if len(vects[p]) == 0 {
			
 
				+				vects[p] = make([]byte, size)
			
 
				+			}
			
 
				+			vtmp[i+d] = vects[p]
			
 
				+		}
			
 
				+		etmp := &encAVX2{data: d, parity: pCnt, gen: g}
			
 
				+		err2 := etmp.encodeGen(vtmp)
			
 
				+		if err2 != nil {
			
 
				+			return err2
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	if len(has) != d {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	dCnt := len(dLost)
			
 
				+	if dCnt > p {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	pCnt := len(pLost)
			
 
				+	if pCnt > p {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	return e.reconst(vects, has, dLost, pLost, dataOnly)
			
 
				+}
			
 
				+
			
 
				+func (e *encAVX2) reconstruct(vects [][]byte, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	t := d + p
			
 
				+	listBuf := make([]int, t+p)
			
 
				+	has := listBuf[:d]
			
 
				+	dLost := listBuf[d:t]
			
 
				+	pLost := listBuf[t : t+p]
			
 
				+	hasCnt, dCnt, pCnt := 0, 0, 0
			
 
				+	for i := 0; i < t; i++ {
			
 
				+		if vects[i] != nil {
			
 
				+			if hasCnt < d {
			
 
				+				has[hasCnt] = i
			
 
				+				hasCnt++
			
 
				+			}
			
 
				+		} else {
			
 
				+			if i < d {
			
 
				+				if dCnt < p {
			
 
				+					dLost[dCnt] = i
			
 
				+					dCnt++
			
 
				+				} else {
			
 
				+					return errors.New("rs.Reconst: not enough vects")
			
 
				+				}
			
 
				+			} else {
			
 
				+				if pCnt < p {
			
 
				+					pLost[pCnt] = i
			
 
				+					pCnt++
			
 
				+				} else {
			
 
				+					return errors.New("rs.Reconst: not enough vects")
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	if hasCnt != d {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	dLost = dLost[:dCnt]
			
 
				+	pLost = pLost[:pCnt]
			
 
				+	return e.reconst(vects, has, dLost, pLost, dataOnly)
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) Encode(vects [][]byte) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	size, err := checkEnc(d, p, vects)
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+	dv := vects[:d]
			
 
				+	pv := vects[d:]
			
 
				+	start, end := 0, 0
			
 
				+	do := getDo(size)
			
 
				+	for start < size {
			
 
				+		end = start + do
			
 
				+		if end <= size {
			
 
				+			e.matrixMul(start, end, dv, pv)
			
 
				+			start = end
			
 
				+		} else {
			
 
				+			e.matrixMulRemain(start, size, dv, pv)
			
 
				+			start = size
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+//go:noescape
			
 
				+func mulVectSSSE3(tbl, d, p []byte)
			
 
				+
			
 
				+//go:noescape
			
 
				+func mulVectAddSSSE3(tbl, d, p []byte)
			
 
				+
			
 
				+func (e *encSSSE3) matrixMul(start, end int, dv, pv [][]byte) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	tbl := e.tbl
			
 
				+	off := 0
			
 
				+	for i := 0; i < d; i++ {
			
 
				+		for j := 0; j < p; j++ {
			
 
				+			t := tbl[off : off+32]
			
 
				+			if i != 0 {
			
 
				+				mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
			
 
				+			} else {
			
 
				+				mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
			
 
				+			}
			
 
				+			off += 32
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) matrixMulRemain(start, end int, dv, pv [][]byte) {
			
 
				+	undone := end - start
			
 
				+	do := (undone >> 4) << 4
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	tbl := e.tbl
			
 
				+	if do >= 16 {
			
 
				+		end2 := start + do
			
 
				+		off := 0
			
 
				+		for i := 0; i < d; i++ {
			
 
				+			for j := 0; j < p; j++ {
			
 
				+				t := tbl[off : off+32]
			
 
				+				if i != 0 {
			
 
				+					mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
			
 
				+				} else {
			
 
				+					mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
			
 
				+				}
			
 
				+				off += 32
			
 
				+			}
			
 
				+		}
			
 
				+		start = end
			
 
				+	}
			
 
				+	if undone > do {
			
 
				+		start2 := end - 16
			
 
				+		if start2 >= 0 {
			
 
				+			off := 0
			
 
				+			for i := 0; i < d; i++ {
			
 
				+				for j := 0; j < p; j++ {
			
 
				+					t := tbl[off : off+32]
			
 
				+					if i != 0 {
			
 
				+						mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
			
 
				+					} else {
			
 
				+						mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
			
 
				+					}
			
 
				+					off += 32
			
 
				+				}
			
 
				+			}
			
 
				+		} else {
			
 
				+			g := e.gen
			
 
				+			for i := 0; i < d; i++ {
			
 
				+				for j := 0; j < p; j++ {
			
 
				+					if i != 0 {
			
 
				+						mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
			
 
				+					} else {
			
 
				+						mulVect(g[j*d], dv[0][start:], pv[j][start:])
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// use generator-matrix but not tbls for encoding
			
 
				+// it's design for reconstructing
			
 
				+// for small vects, it cost to much time on initTbl, so drop it
			
 
				+// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
			
 
				+func (e *encSSSE3) encodeGen(vects [][]byte) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	size, err := checkEnc(d, p, vects)
			
 
				+	if err != nil {
			
 
				+		return
			
 
				+	}
			
 
				+	dv := vects[:d]
			
 
				+	pv := vects[d:]
			
 
				+	start, end := 0, 0
			
 
				+	do := getDo(size)
			
 
				+	for start < size {
			
 
				+		end = start + do
			
 
				+		if end <= size {
			
 
				+			e.matrixMulGen(start, end, dv, pv)
			
 
				+			start = end
			
 
				+		} else {
			
 
				+			e.matrixMulRemainGen(start, size, dv, pv)
			
 
				+			start = size
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) matrixMulGen(start, end int, dv, pv [][]byte) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	g := e.gen
			
 
				+	for i := 0; i < d; i++ {
			
 
				+		for j := 0; j < p; j++ {
			
 
				+			t := lowhighTbl[g[j*d+i]][:]
			
 
				+			if i != 0 {
			
 
				+				mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
			
 
				+			} else {
			
 
				+				mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
			
 
				+	undone := end - start
			
 
				+	do := (undone >> 4) << 4
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	g := e.gen
			
 
				+	if do >= 16 {
			
 
				+		end2 := start + do
			
 
				+		for i := 0; i < d; i++ {
			
 
				+			for j := 0; j < p; j++ {
			
 
				+				t := lowhighTbl[g[j*d+i]][:]
			
 
				+				if i != 0 {
			
 
				+					mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
			
 
				+				} else {
			
 
				+					mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		start = end
			
 
				+	}
			
 
				+	if undone > do {
			
 
				+		start2 := end - 16
			
 
				+		if start2 >= 0 {
			
 
				+			for i := 0; i < d; i++ {
			
 
				+				for j := 0; j < p; j++ {
			
 
				+					t := lowhighTbl[g[j*d+i]][:]
			
 
				+					if i != 0 {
			
 
				+						mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
			
 
				+					} else {
			
 
				+						mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		} else {
			
 
				+			for i := 0; i < d; i++ {
			
 
				+				for j := 0; j < p; j++ {
			
 
				+					if i != 0 {
			
 
				+						mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
			
 
				+					} else {
			
 
				+						mulVect(g[j*d], dv[0][start:], pv[j][start:])
			
 
				+					}
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) Reconstruct(vects [][]byte) (err error) {
			
 
				+	return e.reconstruct(vects, false)
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) ReconstructData(vects [][]byte) (err error) {
			
 
				+	return e.reconstruct(vects, true)
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
			
 
				+	return e.reconstWithPos(vects, has, dLost, pLost, false)
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
			
 
				+	return e.reconstWithPos(vects, has, dLost, nil, true)
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) makeGen(has, dLost []int) (gen []byte, err error) {
			
 
				+	d := e.data
			
 
				+	em := e.encode
			
 
				+	cnt := len(dLost)
			
 
				+	if !e.enableCache {
			
 
				+		matrixbuf := make([]byte, 4*d*d+cnt*d)
			
 
				+		m := matrixbuf[:d*d]
			
 
				+		for i, l := range has {
			
 
				+			copy(m[i*d:i*d+d], em[l*d:l*d+d])
			
 
				+		}
			
 
				+		raw := matrixbuf[d*d : 3*d*d]
			
 
				+		im := matrixbuf[3*d*d : 4*d*d]
			
 
				+		err2 := matrix(m).invert(raw, d, im)
			
 
				+		if err2 != nil {
			
 
				+			return nil, err2
			
 
				+		}
			
 
				+		g := matrixbuf[4*d*d:]
			
 
				+		for i, l := range dLost {
			
 
				+			copy(g[i*d:i*d+d], im[l*d:l*d+d])
			
 
				+		}
			
 
				+		return g, nil
			
 
				+	}
			
 
				+	var ikey uint32
			
 
				+	for _, p := range has {
			
 
				+		ikey += 1 << uint8(p)
			
 
				+	}
			
 
				+	e.inverseCache.RLock()
			
 
				+	v, ok := e.inverseCache.data[ikey]
			
 
				+	if ok {
			
 
				+		im := v
			
 
				+		g := make([]byte, cnt*d)
			
 
				+		for i, l := range dLost {
			
 
				+			copy(g[i*d:i*d+d], im[l*d:l*d+d])
			
 
				+		}
			
 
				+		e.inverseCache.RUnlock()
			
 
				+		return g, nil
			
 
				+	}
			
 
				+	e.inverseCache.RUnlock()
			
 
				+	matrixbuf := make([]byte, 4*d*d+cnt*d)
			
 
				+	m := matrixbuf[:d*d]
			
 
				+	for i, l := range has {
			
 
				+		copy(m[i*d:i*d+d], em[l*d:l*d+d])
			
 
				+	}
			
 
				+	raw := matrixbuf[d*d : 3*d*d]
			
 
				+	im := matrixbuf[3*d*d : 4*d*d]
			
 
				+	err2 := matrix(m).invert(raw, d, im)
			
 
				+	if err2 != nil {
			
 
				+		return nil, err2
			
 
				+	}
			
 
				+	e.inverseCache.Lock()
			
 
				+	e.inverseCache.data[ikey] = im
			
 
				+	e.inverseCache.Unlock()
			
 
				+	g := matrixbuf[4*d*d:]
			
 
				+	for i, l := range dLost {
			
 
				+		copy(g[i*d:i*d+d], im[l*d:l*d+d])
			
 
				+	}
			
 
				+	return g, nil
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	em := e.encode
			
 
				+	dCnt := len(dLost)
			
 
				+	size := len(vects[has[0]])
			
 
				+	if dCnt != 0 {
			
 
				+		vtmp := make([][]byte, d+dCnt)
			
 
				+		for i, p := range has {
			
 
				+			vtmp[i] = vects[p]
			
 
				+		}
			
 
				+		for i, p := range dLost {
			
 
				+			if len(vects[p]) == 0 {
			
 
				+				vects[p] = make([]byte, size)
			
 
				+			}
			
 
				+			vtmp[i+d] = vects[p]
			
 
				+		}
			
 
				+		g, err2 := e.makeGen(has, dLost)
			
 
				+		if err2 != nil {
			
 
				+			return
			
 
				+		}
			
 
				+		etmp := &encSSSE3{data: d, parity: dCnt, gen: g}
			
 
				+		err2 = etmp.encodeGen(vtmp)
			
 
				+		if err2 != nil {
			
 
				+			return err2
			
 
				+		}
			
 
				+	}
			
 
				+	if dataOnly {
			
 
				+		return
			
 
				+	}
			
 
				+	pCnt := len(pLost)
			
 
				+	if pCnt != 0 {
			
 
				+		g := make([]byte, pCnt*d)
			
 
				+		for i, l := range pLost {
			
 
				+			copy(g[i*d:i*d+d], em[l*d:l*d+d])
			
 
				+		}
			
 
				+		vtmp := make([][]byte, d+pCnt)
			
 
				+		for i := 0; i < d; i++ {
			
 
				+			vtmp[i] = vects[i]
			
 
				+		}
			
 
				+		for i, p := range pLost {
			
 
				+			if len(vects[p]) == 0 {
			
 
				+				vects[p] = make([]byte, size)
			
 
				+			}
			
 
				+			vtmp[i+d] = vects[p]
			
 
				+		}
			
 
				+		etmp := &encSSSE3{data: d, parity: pCnt, gen: g}
			
 
				+		err2 := etmp.encodeGen(vtmp)
			
 
				+		if err2 != nil {
			
 
				+			return err2
			
 
				+		}
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	if len(has) != d {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	dCnt := len(dLost)
			
 
				+	if dCnt > p {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	pCnt := len(pLost)
			
 
				+	if pCnt > p {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	return e.reconst(vects, has, dLost, pLost, dataOnly)
			
 
				+}
			
 
				+
			
 
				+func (e *encSSSE3) reconstruct(vects [][]byte, dataOnly bool) (err error) {
			
 
				+	d := e.data
			
 
				+	p := e.parity
			
 
				+	t := d + p
			
 
				+	listBuf := make([]int, t+p)
			
 
				+	has := listBuf[:d]
			
 
				+	dLost := listBuf[d:t]
			
 
				+	pLost := listBuf[t : t+p]
			
 
				+	hasCnt, dCnt, pCnt := 0, 0, 0
			
 
				+	for i := 0; i < t; i++ {
			
 
				+		if vects[i] != nil {
			
 
				+			if hasCnt < d {
			
 
				+				has[hasCnt] = i
			
 
				+				hasCnt++
			
 
				+			}
			
 
				+		} else {
			
 
				+			if i < d {
			
 
				+				if dCnt < p {
			
 
				+					dLost[dCnt] = i
			
 
				+					dCnt++
			
 
				+				} else {
			
 
				+					return errors.New("rs.Reconst: not enough vects")
			
 
				+				}
			
 
				+			} else {
			
 
				+				if pCnt < p {
			
 
				+					pLost[pCnt] = i
			
 
				+					pCnt++
			
 
				+				} else {
			
 
				+					return errors.New("rs.Reconst: not enough vects")
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	if hasCnt != d {
			
 
				+		return errors.New("rs.Reconst: not enough vects")
			
 
				+	}
			
 
				+	dLost = dLost[:dCnt]
			
 
				+	pLost = pLost[:pCnt]
			
 
				+	return e.reconst(vects, has, dLost, pLost, dataOnly)
			
 
				+}
			
--- a/vendor/github.com/templexxx/reedsolomon/rs_amd64.s
+++ b/vendor/github.com/templexxx/reedsolomon/rs_amd64.s
@@ -0,0 +1,401 @@
 
				+// Reference: www.ssrc.ucsc.edu/Papers/plank-fast13.pdf
			
 
				+
			
 
				+#include "textflag.h"
			
 
				+
			
 
				+#define low_tbl Y0
			
 
				+#define high_tbl Y1
			
 
				+#define mask Y2
			
 
				+#define in0  Y3
			
 
				+#define in1  Y4
			
 
				+#define in2  Y5
			
 
				+#define in3  Y6
			
 
				+#define in4  Y7
			
 
				+#define in5  Y8
			
 
				+#define in0_h  Y10
			
 
				+#define in1_h  Y11
			
 
				+#define in2_h  Y12
			
 
				+#define in3_h  Y13
			
 
				+#define in4_h  Y14
			
 
				+#define in5_h  Y15
			
 
				+
			
 
				+#define in  BX
			
 
				+#define out DI
			
 
				+#define len R8
			
 
				+#define pos R9
			
 
				+
			
 
				+#define tmp0 R10
			
 
				+
			
 
				+#define low_tblx X0
			
 
				+#define high_tblx X1
			
 
				+#define maskx X2
			
 
				+#define in0x X3
			
 
				+#define in0_hx X10
			
 
				+#define tmp0x  X9
			
 
				+#define tmp1x  X11
			
 
				+#define tmp2x  X12
			
 
				+#define tmp3x  X13
			
 
				+
			
 
				+
			
 
				+// func mulVectAVX2(tbl, d, p []byte)
			
 
				+TEXT ·mulVectAVX2(SB), NOSPLIT, $0
			
 
				+    MOVQ         i+24(FP), in
			
 
				+	MOVQ         o+48(FP), out
			
 
				+	MOVQ         tbl+0(FP), tmp0
			
 
				+	VMOVDQU      (tmp0), low_tblx
			
 
				+	VMOVDQU      16(tmp0), high_tblx
			
 
				+	MOVB         $0x0f, DX
			
 
				+	LONG         $0x2069e3c4; WORD $0x00d2   // VPINSRB $0x00, EDX, XMM2, XMM2
			
 
				+	VPBROADCASTB maskx, maskx
			
 
				+	MOVQ         in_len+32(FP), len
			
 
				+	TESTQ        $31, len
			
 
				+	JNZ          one16b
			
 
				+
			
 
				+ymm:
			
 
				+    VINSERTI128  $1, low_tblx, low_tbl, low_tbl
			
 
				+    VINSERTI128  $1, high_tblx, high_tbl, high_tbl
			
 
				+    VINSERTI128  $1, maskx, mask, mask
			
 
				+    TESTQ        $255, len
			
 
				+    JNZ          not_aligned
			
 
				+
			
 
				+// 256bytes/loop
			
 
				+aligned:
			
 
				+    MOVQ         $0, pos
			
 
				+
			
 
				+loop256b:
			
 
				+	VMOVDQU (in)(pos*1), in0
			
 
				+	VPSRLQ  $4, in0, in0_h
			
 
				+	VPAND   mask, in0_h, in0_h
			
 
				+	VPAND   mask, in0, in0
			
 
				+	VPSHUFB in0_h, high_tbl, in0_h
			
 
				+	VPSHUFB in0, low_tbl, in0
			
 
				+	VPXOR   in0, in0_h, in0
			
 
				+	VMOVDQU in0, (out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 32(in)(pos*1), in1
			
 
				+	VPSRLQ  $4, in1, in1_h
			
 
				+	VPAND   mask, in1_h, in1_h
			
 
				+	VPAND   mask, in1, in1
			
 
				+	VPSHUFB in1_h, high_tbl, in1_h
			
 
				+	VPSHUFB in1, low_tbl, in1
			
 
				+	VPXOR   in1, in1_h, in1
			
 
				+	VMOVDQU in1, 32(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 64(in)(pos*1), in2
			
 
				+	VPSRLQ  $4, in2, in2_h
			
 
				+	VPAND   mask, in2_h, in2_h
			
 
				+	VPAND   mask, in2, in2
			
 
				+	VPSHUFB in2_h, high_tbl, in2_h
			
 
				+	VPSHUFB in2, low_tbl, in2
			
 
				+	VPXOR   in2, in2_h, in2
			
 
				+	VMOVDQU in2, 64(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 96(in)(pos*1), in3
			
 
				+	VPSRLQ  $4, in3, in3_h
			
 
				+	VPAND   mask, in3_h, in3_h
			
 
				+	VPAND   mask, in3, in3
			
 
				+	VPSHUFB in3_h, high_tbl, in3_h
			
 
				+	VPSHUFB in3, low_tbl, in3
			
 
				+	VPXOR   in3, in3_h, in3
			
 
				+	VMOVDQU in3, 96(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 128(in)(pos*1), in4
			
 
				+	VPSRLQ  $4, in4, in4_h
			
 
				+	VPAND   mask, in4_h, in4_h
			
 
				+	VPAND   mask, in4, in4
			
 
				+	VPSHUFB in4_h, high_tbl, in4_h
			
 
				+	VPSHUFB in4, low_tbl, in4
			
 
				+	VPXOR   in4, in4_h, in4
			
 
				+	VMOVDQU in4, 128(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 160(in)(pos*1), in5
			
 
				+	VPSRLQ  $4, in5, in5_h
			
 
				+	VPAND   mask, in5_h, in5_h
			
 
				+	VPAND   mask, in5, in5
			
 
				+	VPSHUFB in5_h, high_tbl, in5_h
			
 
				+	VPSHUFB in5, low_tbl, in5
			
 
				+	VPXOR   in5, in5_h, in5
			
 
				+	VMOVDQU in5, 160(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 192(in)(pos*1), in0
			
 
				+	VPSRLQ  $4, in0, in0_h
			
 
				+	VPAND   mask, in0_h, in0_h
			
 
				+	VPAND   mask, in0, in0
			
 
				+	VPSHUFB in0_h, high_tbl, in0_h
			
 
				+	VPSHUFB in0, low_tbl, in0
			
 
				+	VPXOR   in0, in0_h, in0
			
 
				+	VMOVDQU in0, 192(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 224(in)(pos*1), in1
			
 
				+	VPSRLQ  $4, in1, in1_h
			
 
				+	VPAND   mask, in1_h, in1_h
			
 
				+	VPAND   mask, in1, in1
			
 
				+	VPSHUFB in1_h, high_tbl, in1_h
			
 
				+	VPSHUFB in1, low_tbl, in1
			
 
				+	VPXOR   in1, in1_h, in1
			
 
				+	VMOVDQU in1, 224(out)(pos*1)
			
 
				+
			
 
				+	ADDQ    $256, pos
			
 
				+	CMPQ    len, pos
			
 
				+	JNE     loop256b
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+not_aligned:
			
 
				+    MOVQ    len, tmp0
			
 
				+    ANDQ    $255, tmp0
			
 
				+
			
 
				+loop32b:
			
 
				+    VMOVDQU -32(in)(len*1), in0
			
 
				+	VPSRLQ  $4, in0, in0_h
			
 
				+	VPAND   mask, in0_h, in0_h
			
 
				+	VPAND   mask, in0, in0
			
 
				+	VPSHUFB in0_h, high_tbl, in0_h
			
 
				+	VPSHUFB in0, low_tbl, in0
			
 
				+	VPXOR   in0, in0_h, in0
			
 
				+	VMOVDQU in0, -32(out)(len*1)
			
 
				+	SUBQ    $32, len
			
 
				+	SUBQ    $32, tmp0
			
 
				+	JG      loop32b
			
 
				+	CMPQ    len, $256
			
 
				+	JGE     aligned
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+one16b:
			
 
				+    VMOVDQU  -16(in)(len*1), in0x
			
 
				+    VPSRLQ   $4, in0x, in0_hx
			
 
				+    VPAND    maskx, in0x, in0x
			
 
				+    VPAND    maskx, in0_hx, in0_hx
			
 
				+    VPSHUFB  in0_hx, high_tblx, in0_hx
			
 
				+    VPSHUFB  in0x, low_tblx, in0x
			
 
				+    VPXOR    in0x, in0_hx, in0x
			
 
				+	VMOVDQU  in0x, -16(out)(len*1)
			
 
				+	SUBQ     $16, len
			
 
				+	CMPQ     len, $0
			
 
				+	JNE      ymm
			
 
				+	RET
			
 
				+
			
 
				+// func mulVectAddAVX2(tbl, d, p []byte)
			
 
				+TEXT ·mulVectAddAVX2(SB), NOSPLIT, $0
			
 
				+    MOVQ         i+24(FP), in
			
 
				+	MOVQ         o+48(FP), out
			
 
				+	MOVQ         tbl+0(FP), tmp0
			
 
				+	VMOVDQU      (tmp0), low_tblx
			
 
				+	VMOVDQU      16(tmp0), high_tblx
			
 
				+	MOVB         $0x0f, DX
			
 
				+	LONG         $0x2069e3c4; WORD $0x00d2
			
 
				+	VPBROADCASTB maskx, maskx
			
 
				+	MOVQ         in_len+32(FP), len
			
 
				+	TESTQ        $31, len
			
 
				+	JNZ          one16b
			
 
				+
			
 
				+ymm:
			
 
				+    VINSERTI128  $1, low_tblx, low_tbl, low_tbl
			
 
				+    VINSERTI128  $1, high_tblx, high_tbl, high_tbl
			
 
				+    VINSERTI128  $1, maskx, mask, mask
			
 
				+    TESTQ        $255, len
			
 
				+    JNZ          not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+    MOVQ         $0, pos
			
 
				+
			
 
				+loop256b:
			
 
				+    VMOVDQU (in)(pos*1), in0
			
 
				+	VPSRLQ  $4, in0, in0_h
			
 
				+	VPAND   mask, in0_h, in0_h
			
 
				+	VPAND   mask, in0, in0
			
 
				+	VPSHUFB in0_h, high_tbl, in0_h
			
 
				+	VPSHUFB in0, low_tbl, in0
			
 
				+	VPXOR   in0, in0_h, in0
			
 
				+	VPXOR   (out)(pos*1), in0, in0
			
 
				+	VMOVDQU in0, (out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 32(in)(pos*1), in1
			
 
				+	VPSRLQ  $4, in1, in1_h
			
 
				+	VPAND   mask, in1_h, in1_h
			
 
				+	VPAND   mask, in1, in1
			
 
				+	VPSHUFB in1_h, high_tbl, in1_h
			
 
				+	VPSHUFB in1, low_tbl, in1
			
 
				+	VPXOR   in1, in1_h, in1
			
 
				+	VPXOR   32(out)(pos*1), in1, in1
			
 
				+	VMOVDQU in1, 32(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 64(in)(pos*1), in2
			
 
				+	VPSRLQ  $4, in2, in2_h
			
 
				+	VPAND   mask, in2_h, in2_h
			
 
				+	VPAND   mask, in2, in2
			
 
				+	VPSHUFB in2_h, high_tbl, in2_h
			
 
				+	VPSHUFB in2, low_tbl, in2
			
 
				+	VPXOR   in2, in2_h, in2
			
 
				+	VPXOR   64(out)(pos*1), in2, in2
			
 
				+	VMOVDQU in2, 64(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 96(in)(pos*1), in3
			
 
				+	VPSRLQ  $4, in3, in3_h
			
 
				+	VPAND   mask, in3_h, in3_h
			
 
				+	VPAND   mask, in3, in3
			
 
				+	VPSHUFB in3_h, high_tbl, in3_h
			
 
				+	VPSHUFB in3, low_tbl, in3
			
 
				+	VPXOR   in3, in3_h, in3
			
 
				+	VPXOR   96(out)(pos*1), in3, in3
			
 
				+	VMOVDQU in3, 96(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 128(in)(pos*1), in4
			
 
				+	VPSRLQ  $4, in4, in4_h
			
 
				+	VPAND   mask, in4_h, in4_h
			
 
				+	VPAND   mask, in4, in4
			
 
				+	VPSHUFB in4_h, high_tbl, in4_h
			
 
				+	VPSHUFB in4, low_tbl, in4
			
 
				+	VPXOR   in4, in4_h, in4
			
 
				+	VPXOR   128(out)(pos*1), in4, in4
			
 
				+	VMOVDQU in4, 128(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 160(in)(pos*1), in5
			
 
				+	VPSRLQ  $4, in5, in5_h
			
 
				+	VPAND   mask, in5_h, in5_h
			
 
				+	VPAND   mask, in5, in5
			
 
				+	VPSHUFB in5_h, high_tbl, in5_h
			
 
				+	VPSHUFB in5, low_tbl, in5
			
 
				+	VPXOR   in5, in5_h, in5
			
 
				+	VPXOR   160(out)(pos*1), in5, in5
			
 
				+	VMOVDQU in5, 160(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 192(in)(pos*1), in0
			
 
				+	VPSRLQ  $4, in0, in0_h
			
 
				+	VPAND   mask, in0_h, in0_h
			
 
				+	VPAND   mask, in0, in0
			
 
				+	VPSHUFB in0_h, high_tbl, in0_h
			
 
				+	VPSHUFB in0, low_tbl, in0
			
 
				+	VPXOR   in0, in0_h, in0
			
 
				+	VPXOR   192(out)(pos*1), in0, in0
			
 
				+	VMOVDQU in0, 192(out)(pos*1)
			
 
				+
			
 
				+    VMOVDQU 224(in)(pos*1), in1
			
 
				+	VPSRLQ  $4, in1, in1_h
			
 
				+	VPAND   mask, in1_h, in1_h
			
 
				+	VPAND   mask, in1, in1
			
 
				+	VPSHUFB in1_h, high_tbl, in1_h
			
 
				+	VPSHUFB in1, low_tbl, in1
			
 
				+	VPXOR   in1, in1_h, in1
			
 
				+	VPXOR   224(out)(pos*1), in1, in1
			
 
				+	VMOVDQU in1, 224(out)(pos*1)
			
 
				+
			
 
				+	ADDQ    $256, pos
			
 
				+	CMPQ    len, pos
			
 
				+	JNE     loop256b
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+not_aligned:
			
 
				+    MOVQ    len, tmp0
			
 
				+    ANDQ    $255, tmp0
			
 
				+
			
 
				+loop32b:
			
 
				+    VMOVDQU -32(in)(len*1), in0
			
 
				+	VPSRLQ  $4, in0, in0_h
			
 
				+	VPAND   mask, in0_h, in0_h
			
 
				+	VPAND   mask, in0, in0
			
 
				+	VPSHUFB in0_h, high_tbl, in0_h
			
 
				+	VPSHUFB in0, low_tbl, in0
			
 
				+	VPXOR   in0, in0_h, in0
			
 
				+	VPXOR   -32(out)(len*1), in0, in0
			
 
				+	VMOVDQU in0, -32(out)(len*1)
			
 
				+	SUBQ    $32, len
			
 
				+	SUBQ    $32, tmp0
			
 
				+	JG      loop32b
			
 
				+	CMPQ    len, $256
			
 
				+	JGE     aligned
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+one16b:
			
 
				+    VMOVDQU  -16(in)(len*1), in0x
			
 
				+    VPSRLQ   $4, in0x, in0_hx
			
 
				+    VPAND    maskx, in0x, in0x
			
 
				+    VPAND    maskx, in0_hx, in0_hx
			
 
				+    VPSHUFB  in0_hx, high_tblx, in0_hx
			
 
				+    VPSHUFB  in0x, low_tblx, in0x
			
 
				+    VPXOR    in0x, in0_hx, in0x
			
 
				+    VPXOR    -16(out)(len*1), in0x, in0x
			
 
				+	VMOVDQU  in0x, -16(out)(len*1)
			
 
				+	SUBQ     $16, len
			
 
				+	CMPQ     len, $0
			
 
				+	JNE      ymm
			
 
				+	RET
			
 
				+
			
 
				+// func mulVectSSSE3(tbl, d, p []byte)
			
 
				+TEXT ·mulVectSSSE3(SB), NOSPLIT, $0
			
 
				+    MOVQ    i+24(FP), in
			
 
				+	MOVQ    o+48(FP), out
			
 
				+	MOVQ    tbl+0(FP), tmp0
			
 
				+	MOVOU   (tmp0), low_tblx
			
 
				+	MOVOU   16(tmp0), high_tblx
			
 
				+    MOVB    $15, tmp0
			
 
				+    MOVQ    tmp0, maskx
			
 
				+    PXOR    tmp0x, tmp0x
			
 
				+   	PSHUFB  tmp0x, maskx
			
 
				+	MOVQ    in_len+32(FP), len
			
 
				+	SHRQ    $4, len
			
 
				+
			
 
				+loop:
			
 
				+	MOVOU  (in), in0x
			
 
				+	MOVOU  in0x, in0_hx
			
 
				+	PSRLQ  $4, in0_hx
			
 
				+	PAND   maskx, in0x
			
 
				+	PAND   maskx, in0_hx
			
 
				+	MOVOU  low_tblx, tmp1x
			
 
				+	MOVOU  high_tblx, tmp2x
			
 
				+	PSHUFB in0x, tmp1x
			
 
				+	PSHUFB in0_hx, tmp2x
			
 
				+	PXOR   tmp1x, tmp2x
			
 
				+	MOVOU  tmp2x, (out)
			
 
				+	ADDQ   $16, in
			
 
				+	ADDQ   $16, out
			
 
				+	SUBQ   $1, len
			
 
				+	JNZ    loop
			
 
				+	RET
			
 
				+
			
 
				+// func mulVectAddSSSE3(tbl, d, p []byte)
			
 
				+TEXT ·mulVectAddSSSE3(SB), NOSPLIT, $0
			
 
				+    MOVQ    i+24(FP), in
			
 
				+	MOVQ    o+48(FP), out
			
 
				+	MOVQ    tbl+0(FP), tmp0
			
 
				+	MOVOU   (tmp0), low_tblx
			
 
				+	MOVOU   16(tmp0), high_tblx
			
 
				+    MOVB    $15, tmp0
			
 
				+    MOVQ    tmp0, maskx
			
 
				+    PXOR    tmp0x, tmp0x
			
 
				+   	PSHUFB  tmp0x, maskx
			
 
				+	MOVQ    in_len+32(FP), len
			
 
				+	SHRQ    $4, len
			
 
				+
			
 
				+loop:
			
 
				+	MOVOU  (in), in0x
			
 
				+	MOVOU  in0x, in0_hx
			
 
				+	PSRLQ  $4, in0_hx
			
 
				+	PAND   maskx, in0x
			
 
				+	PAND   maskx, in0_hx
			
 
				+	MOVOU  low_tblx, tmp1x
			
 
				+	MOVOU  high_tblx, tmp2x
			
 
				+	PSHUFB in0x, tmp1x
			
 
				+	PSHUFB in0_hx, tmp2x
			
 
				+	PXOR   tmp1x, tmp2x
			
 
				+	MOVOU  (out), tmp3x
			
 
				+	PXOR   tmp3x, tmp2x
			
 
				+	MOVOU  tmp2x, (out)
			
 
				+	ADDQ   $16, in
			
 
				+	ADDQ   $16, out
			
 
				+	SUBQ   $1, len
			
 
				+	JNZ    loop
			
 
				+	RET
			
 
				+
			
 
				+// func copy32B(dst, src []byte)
			
 
				+TEXT ·copy32B(SB), NOSPLIT, $0
			
 
				+    MOVQ dst+0(FP), SI
			
 
				+    MOVQ src+24(FP), DX
			
 
				+    MOVOU (DX), X0
			
 
				+    MOVOU 16(DX), X1
			
 
				+    MOVOU X0, (SI)
			
 
				+    MOVOU X1, 16(SI)
			
 
				+    RET
			
 
				+	
			
--- a/vendor/github.com/templexxx/reedsolomon/rs_other.go
+++ b/vendor/github.com/templexxx/reedsolomon/rs_other.go
@@ -0,0 +1,8 @@
 
				+// +build !amd64
			
 
				+
			
 
				+package reedsolomon
			
 
				+
			
 
				+func newRS(d, p int, em matrix) (enc Encoder) {
			
 
				+	g := em[d*d:]
			
 
				+	return &encBase{data: d, parity: p, encode: em, gen: g}
			
 
				+}
			
--- a/vendor/github.com/templexxx/reedsolomon/tbl.go
+++ b/vendor/github.com/templexxx/reedsolomon/tbl.go
--- a/vendor/github.com/templexxx/xor/.gitattributes
+++ b/vendor/github.com/templexxx/xor/.gitattributes
@@ -0,0 +1 @@
 
				+*.s linguist-language=go
			
--- a/vendor/github.com/templexxx/xor/.gitignore
+++ b/vendor/github.com/templexxx/xor/.gitignore
@@ -0,0 +1,18 @@
 
				+# Binaries for programs and plugins
			
 
				+*.exe
			
 
				+*.dll
			
 
				+*.so
			
 
				+*.dylib
			
 
				+
			
 
				+# Test binary, build with `go test -c`
			
 
				+*.test
			
 
				+
			
 
				+# Output of the go coverage tool, specifically when used with LiteIDE
			
 
				+*.out
			
 
				+
			
 
				+# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
			
 
				+.glide/
			
 
				+/backup/
			
 
				+/backup2/
			
 
				+/.idea
			
 
				+/backup3/
			
--- a/vendor/github.com/templexxx/xor/LICENSE
+++ b/vendor/github.com/templexxx/xor/LICENSE
@@ -0,0 +1,21 @@
 
				+MIT License
			
 
				+
			
 
				+Copyright (c) 2017 Temple3x
			
 
				+
			
 
				+Permission is hereby granted, free of charge, to any person obtaining a copy
			
 
				+of this software and associated documentation files (the "Software"), to deal
			
 
				+in the Software without restriction, including without limitation the rights
			
 
				+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			
 
				+copies of the Software, and to permit persons to whom the Software is
			
 
				+furnished to do so, subject to the following conditions:
			
 
				+
			
 
				+The above copyright notice and this permission notice shall be included in all
			
 
				+copies or substantial portions of the Software.
			
 
				+
			
 
				+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			
 
				+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			
 
				+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			
 
				+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			
 
				+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			
 
				+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			
 
				+SOFTWARE.
			
--- a/vendor/github.com/templexxx/xor/README.md
+++ b/vendor/github.com/templexxx/xor/README.md
@@ -0,0 +1,48 @@
 
				+# XOR
			
 
				+
			
 
				+XOR code engine in pure Go
			
 
				+
			
 
				+more than 10GB/S per core
			
 
				+
			
 
				+## Introduction:
			
 
				+
			
 
				+1. Use SIMD (SSE2 or AVX2) for speeding up
			
 
				+2. ...
			
 
				+
			
 
				+## Installation
			
 
				+To get the package use the standard:
			
 
				+```bash
			
 
				+go get github.com/templexxx/xor
			
 
				+```
			
 
				+
			
 
				+## Documentation
			
 
				+
			
 
				+See the associated [GoDoc](http://godoc.org/github.com/templexxx/xor)
			
 
				+
			
 
				+
			
 
				+## Performance
			
 
				+
			
 
				+Performance depends mainly on:
			
 
				+
			
 
				+1. SIMD extension
			
 
				+2. unit size of worker
			
 
				+3. hardware ( CPU RAM etc)
			
 
				+
			
 
				+Example of performance on my MacBook 2014-mid(i5-4278U 2.6GHz 2 physical cores). The 16MB per shards.
			
 
				+```
			
 
				+speed = ( shards * size ) / cost
			
 
				+```
			
 
				+| data_shards    | shard_size |speed (MB/S) |
			
 
				+|----------|----|-----|
			
 
				+| 2       |1KB|64127.95  |
			
 
				+|2|1400B|59657.55|
			
 
				+|2|16KB|35370.84|
			
 
				+| 2       | 16MB|12128.95 |
			
 
				+| 5       |1KB| 78837.33 |
			
 
				+|5|1400B|58054.89|
			
 
				+|5|16KB|50161.19|
			
 
				+|5| 16MB|12750.41|
			
 
				+
			
 
				+## Who is using this?
			
 
				+
			
 
				+1. https://github.com/xtaci/kcp-go -- A Production-Grade Reliable-UDP Library for golang
			
--- a/vendor/github.com/templexxx/xor/avx2_amd64.s
+++ b/vendor/github.com/templexxx/xor/avx2_amd64.s
@@ -0,0 +1,438 @@
 
				+#include "textflag.h"
			
 
				+
			
 
				+// addr of mem
			
 
				+#define DST BX
			
 
				+#define SRC SI
			
 
				+#define SRC0 TMP4
			
 
				+#define SRC1 TMP5
			
 
				+
			
 
				+// loop args
			
 
				+// num of vect
			
 
				+#define VECT CX
			
 
				+#define LEN DX
			
 
				+// pos of matrix
			
 
				+#define POS R8
			
 
				+
			
 
				+// tmp store
			
 
				+// num of vect or ...
			
 
				+#define TMP1 R9
			
 
				+// pos of matrix or ...
			
 
				+#define TMP2 R10
			
 
				+// store addr of data/parity or ...
			
 
				+#define TMP3 R11
			
 
				+#define TMP4 R12
			
 
				+#define TMP5 R13
			
 
				+#define TMP6 R14
			
 
				+
			
 
				+// func bytesAVX2mini(dst, src0, src1 []byte, size int)
			
 
				+TEXT ·bytesAVX2mini(SB), NOSPLIT, $0
			
 
				+	MOVQ  len+72(FP), LEN
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src0+24(FP), SRC0
			
 
				+	MOVQ  src1+48(FP), SRC1
			
 
				+	TESTQ $31, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop32b:
			
 
				+	VMOVDQU (SRC0)(POS*1), Y0
			
 
				+	VPXOR   (SRC1)(POS*1), Y0, Y0
			
 
				+	VMOVDQU Y0, (DST)(POS*1)
			
 
				+	ADDQ    $32, POS
			
 
				+	CMPQ    LEN, POS
			
 
				+	JNE     loop32b
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVB  -1(SRC0)(LEN*1), TMP1
			
 
				+	MOVB  -1(SRC1)(LEN*1), TMP2
			
 
				+	XORB  TMP1, TMP2
			
 
				+	MOVB  TMP2, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $31, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP1
			
 
				+	ANDQ  $31, TMP1
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ -8(SRC0)(LEN*1), TMP2
			
 
				+	MOVQ -8(SRC1)(LEN*1), TMP3
			
 
				+	XORQ TMP2, TMP3
			
 
				+	MOVQ TMP3, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP1
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $32
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func bytesAVX2small(dst, src0, src1 []byte, size int)
			
 
				+TEXT ·bytesAVX2small(SB), NOSPLIT, $0
			
 
				+	MOVQ  len+72(FP), LEN
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src0+24(FP), SRC0
			
 
				+	MOVQ  src1+48(FP), SRC1
			
 
				+	TESTQ $127, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop128b:
			
 
				+	VMOVDQU (SRC0)(POS*1), Y0
			
 
				+	VMOVDQU 32(SRC0)(POS*1), Y1
			
 
				+	VMOVDQU 64(SRC0)(POS*1), Y2
			
 
				+	VMOVDQU 96(SRC0)(POS*1), Y3
			
 
				+	VPXOR   (SRC1)(POS*1), Y0, Y0
			
 
				+	VPXOR   32(SRC1)(POS*1), Y1, Y1
			
 
				+	VPXOR   64(SRC1)(POS*1), Y2, Y2
			
 
				+	VPXOR   96(SRC1)(POS*1), Y3, Y3
			
 
				+	VMOVDQU Y0, (DST)(POS*1)
			
 
				+	VMOVDQU Y1, 32(DST)(POS*1)
			
 
				+	VMOVDQU Y2, 64(DST)(POS*1)
			
 
				+	VMOVDQU Y3, 96(DST)(POS*1)
			
 
				+
			
 
				+	ADDQ $128, POS
			
 
				+	CMPQ LEN, POS
			
 
				+	JNE  loop128b
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVB  -1(SRC0)(LEN*1), TMP1
			
 
				+	MOVB  -1(SRC1)(LEN*1), TMP2
			
 
				+	XORB  TMP1, TMP2
			
 
				+	MOVB  TMP2, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $127, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP1
			
 
				+	ANDQ  $127, TMP1
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ -8(SRC0)(LEN*1), TMP2
			
 
				+	MOVQ -8(SRC1)(LEN*1), TMP3
			
 
				+	XORQ TMP2, TMP3
			
 
				+	MOVQ TMP3, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP1
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $128
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func bytesAVX2big(dst, src0, src1 []byte, size int)
			
 
				+TEXT ·bytesAVX2big(SB), NOSPLIT, $0
			
 
				+	MOVQ  len+72(FP), LEN
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src0+24(FP), SRC0
			
 
				+	MOVQ  src1+48(FP), SRC1
			
 
				+	TESTQ $127, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop128b:
			
 
				+	VMOVDQU (SRC0)(POS*1), Y0
			
 
				+	VMOVDQU 32(SRC0)(POS*1), Y1
			
 
				+	VMOVDQU 64(SRC0)(POS*1), Y2
			
 
				+	VMOVDQU 96(SRC0)(POS*1), Y3
			
 
				+	VPXOR   (SRC1)(POS*1), Y0, Y0
			
 
				+	VPXOR   32(SRC1)(POS*1), Y1, Y1
			
 
				+	VPXOR   64(SRC1)(POS*1), Y2, Y2
			
 
				+	VPXOR   96(SRC1)(POS*1), Y3, Y3
			
 
				+	LONG    $0xe77da1c4; WORD $0x0304
			
 
				+	LONG    $0xe77da1c4; WORD $0x034c; BYTE $0x20
			
 
				+	LONG    $0xe77da1c4; WORD $0x0354; BYTE $0x40
			
 
				+	LONG    $0xe77da1c4; WORD $0x035c; BYTE $0x60
			
 
				+
			
 
				+	ADDQ $128, POS
			
 
				+	CMPQ LEN, POS
			
 
				+	JNE  loop128b
			
 
				+	SFENCE
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVB  -1(SRC0)(LEN*1), TMP1
			
 
				+	MOVB  -1(SRC1)(LEN*1), TMP2
			
 
				+	XORB  TMP1, TMP2
			
 
				+	MOVB  TMP2, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $127, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP1
			
 
				+	ANDQ  $127, TMP1
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ -8(SRC0)(LEN*1), TMP2
			
 
				+	MOVQ -8(SRC1)(LEN*1), TMP3
			
 
				+	XORQ TMP2, TMP3
			
 
				+	MOVQ TMP3, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP1
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $128
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func matrixAVX2small(dst []byte, src [][]byte)
			
 
				+TEXT ·matrixAVX2small(SB), NOSPLIT, $0
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src+24(FP), SRC
			
 
				+	MOVQ  vec+32(FP), VECT
			
 
				+	MOVQ  len+8(FP), LEN
			
 
				+	TESTQ $127, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop128b:
			
 
				+	MOVQ    VECT, TMP1
			
 
				+	SUBQ    $2, TMP1
			
 
				+	MOVQ    $0, TMP2
			
 
				+	MOVQ    (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ    TMP3, TMP4
			
 
				+	VMOVDQU (TMP3)(POS*1), Y0
			
 
				+	VMOVDQU 32(TMP4)(POS*1), Y1
			
 
				+	VMOVDQU 64(TMP3)(POS*1), Y2
			
 
				+	VMOVDQU 96(TMP4)(POS*1), Y3
			
 
				+
			
 
				+next_vect:
			
 
				+	ADDQ    $24, TMP2
			
 
				+	MOVQ    (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ    TMP3, TMP4
			
 
				+	VMOVDQU (TMP3)(POS*1), Y4
			
 
				+	VMOVDQU 32(TMP4)(POS*1), Y5
			
 
				+	VMOVDQU 64(TMP3)(POS*1), Y6
			
 
				+	VMOVDQU 96(TMP4)(POS*1), Y7
			
 
				+	VPXOR   Y4, Y0, Y0
			
 
				+	VPXOR   Y5, Y1, Y1
			
 
				+	VPXOR   Y6, Y2, Y2
			
 
				+	VPXOR   Y7, Y3, Y3
			
 
				+	SUBQ    $1, TMP1
			
 
				+	JGE     next_vect
			
 
				+
			
 
				+	VMOVDQU Y0, (DST)(POS*1)
			
 
				+	VMOVDQU Y1, 32(DST)(POS*1)
			
 
				+	VMOVDQU Y2, 64(DST)(POS*1)
			
 
				+	VMOVDQU Y3, 96(DST)(POS*1)
			
 
				+
			
 
				+	ADDQ $128, POS
			
 
				+	CMPQ LEN, POS
			
 
				+	JNE  loop128b
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVQ VECT, TMP1
			
 
				+	MOVQ $0, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	SUBQ $2, TMP1
			
 
				+	MOVB -1(TMP3)(LEN*1), TMP5
			
 
				+
			
 
				+next_vect_1b:
			
 
				+	ADDQ $24, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	MOVB -1(TMP3)(LEN*1), TMP6
			
 
				+	XORB TMP6, TMP5
			
 
				+	SUBQ $1, TMP1
			
 
				+	JGE  next_vect_1b
			
 
				+
			
 
				+	MOVB  TMP5, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $127, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP4
			
 
				+	ANDQ  $127, TMP4
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ VECT, TMP1
			
 
				+	MOVQ $0, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	SUBQ $2, TMP1
			
 
				+	MOVQ -8(TMP3)(LEN*1), TMP5
			
 
				+
			
 
				+next_vect_8b:
			
 
				+	ADDQ $24, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ -8(TMP3)(LEN*1), TMP6
			
 
				+	XORQ TMP6, TMP5
			
 
				+	SUBQ $1, TMP1
			
 
				+	JGE  next_vect_8b
			
 
				+
			
 
				+	MOVQ TMP5, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP4
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $128
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func matrixAVX2big(dst []byte, src [][]byte)
			
 
				+TEXT ·matrixAVX2big(SB), NOSPLIT, $0
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src+24(FP), SRC
			
 
				+	MOVQ  vec+32(FP), VECT
			
 
				+	MOVQ  len+8(FP), LEN
			
 
				+	TESTQ $127, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop128b:
			
 
				+	MOVQ    VECT, TMP1
			
 
				+	SUBQ    $2, TMP1
			
 
				+	MOVQ    $0, TMP2
			
 
				+	MOVQ    (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ    TMP3, TMP4
			
 
				+	VMOVDQU (TMP3)(POS*1), Y0
			
 
				+	VMOVDQU 32(TMP4)(POS*1), Y1
			
 
				+	VMOVDQU 64(TMP3)(POS*1), Y2
			
 
				+	VMOVDQU 96(TMP4)(POS*1), Y3
			
 
				+
			
 
				+next_vect:
			
 
				+	ADDQ    $24, TMP2
			
 
				+	MOVQ    (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ    TMP3, TMP4
			
 
				+	VMOVDQU (TMP3)(POS*1), Y4
			
 
				+	VMOVDQU 32(TMP4)(POS*1), Y5
			
 
				+	VMOVDQU 64(TMP3)(POS*1), Y6
			
 
				+	VMOVDQU 96(TMP4)(POS*1), Y7
			
 
				+	VPXOR   Y4, Y0, Y0
			
 
				+	VPXOR   Y5, Y1, Y1
			
 
				+	VPXOR   Y6, Y2, Y2
			
 
				+	VPXOR   Y7, Y3, Y3
			
 
				+	SUBQ    $1, TMP1
			
 
				+	JGE     next_vect
			
 
				+
			
 
				+	LONG $0xe77da1c4; WORD $0x0304             // VMOVNTDQ  go1.8 has
			
 
				+	LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
			
 
				+	LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
			
 
				+	LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
			
 
				+
			
 
				+	ADDQ $128, POS
			
 
				+	CMPQ LEN, POS
			
 
				+	JNE  loop128b
			
 
				+	VZEROUPPER
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVQ VECT, TMP1
			
 
				+	MOVQ $0, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	SUBQ $2, TMP1
			
 
				+	MOVB -1(TMP3)(LEN*1), TMP5
			
 
				+
			
 
				+next_vect_1b:
			
 
				+	ADDQ $24, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	MOVB -1(TMP3)(LEN*1), TMP6
			
 
				+	XORB TMP6, TMP5
			
 
				+	SUBQ $1, TMP1
			
 
				+	JGE  next_vect_1b
			
 
				+
			
 
				+	MOVB  TMP5, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $127, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP4
			
 
				+	ANDQ  $127, TMP4
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ VECT, TMP1
			
 
				+	MOVQ $0, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	SUBQ $2, TMP1
			
 
				+	MOVQ -8(TMP3)(LEN*1), TMP5
			
 
				+
			
 
				+next_vect_8b:
			
 
				+	ADDQ $24, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ -8(TMP3)(LEN*1), TMP6
			
 
				+	XORQ TMP6, TMP5
			
 
				+	SUBQ $1, TMP1
			
 
				+	JGE  next_vect_8b
			
 
				+
			
 
				+	MOVQ TMP5, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP4
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $128
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
--- a/vendor/github.com/templexxx/xor/nosimd.go
+++ b/vendor/github.com/templexxx/xor/nosimd.go
@@ -0,0 +1,116 @@
 
				+// Copyright 2013 The Go Authors. All rights reserved.
			
 
				+// Use of this source code is governed by a BSD-style
			
 
				+// license that can be found in the LICENSE file.
			
 
				+
			
 
				+package xor
			
 
				+
			
 
				+import (
			
 
				+	"runtime"
			
 
				+	"unsafe"
			
 
				+)
			
 
				+
			
 
				+const wordSize = int(unsafe.Sizeof(uintptr(0)))
			
 
				+const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
			
 
				+
			
 
				+// xor the bytes in a and b. The destination is assumed to have enough space.
			
 
				+func bytesNoSIMD(dst, a, b []byte, size int) {
			
 
				+	if supportsUnaligned {
			
 
				+		fastXORBytes(dst, a, b, size)
			
 
				+	} else {
			
 
				+		// TODO(hanwen): if (dst, a, b) have common alignment
			
 
				+		// we could still try fastXORBytes. It is not clear
			
 
				+		// how often this happens, and it's only worth it if
			
 
				+		// the block encryption itself is hardware
			
 
				+		// accelerated.
			
 
				+		safeXORBytes(dst, a, b, size)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// split slice for cache-friendly
			
 
				+const unitSize = 16 * 1024
			
 
				+
			
 
				+func matrixNoSIMD(dst []byte, src [][]byte) {
			
 
				+	size := len(src[0])
			
 
				+	start := 0
			
 
				+	do := unitSize
			
 
				+	for start < size {
			
 
				+		end := start + do
			
 
				+		if end <= size {
			
 
				+			partNoSIMD(start, end, dst, src)
			
 
				+			start = start + do
			
 
				+		} else {
			
 
				+			partNoSIMD(start, size, dst, src)
			
 
				+			start = size
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// split vect will improve performance with big data by reducing cache pollution
			
 
				+func partNoSIMD(start, end int, dst []byte, src [][]byte) {
			
 
				+	bytesNoSIMD(dst[start:end], src[0][start:end], src[1][start:end], end-start)
			
 
				+	for i := 2; i < len(src); i++ {
			
 
				+		bytesNoSIMD(dst[start:end], dst[start:end], src[i][start:end], end-start)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// fastXORBytes xor in bulk. It only works on architectures that
			
 
				+// support unaligned read/writes.
			
 
				+func fastXORBytes(dst, a, b []byte, n int) {
			
 
				+	w := n / wordSize
			
 
				+	if w > 0 {
			
 
				+		wordBytes := w * wordSize
			
 
				+		fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
			
 
				+	}
			
 
				+	for i := n - n%wordSize; i < n; i++ {
			
 
				+		dst[i] = a[i] ^ b[i]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func safeXORBytes(dst, a, b []byte, n int) {
			
 
				+	ex := n % 8
			
 
				+	for i := 0; i < ex; i++ {
			
 
				+		dst[i] = a[i] ^ b[i]
			
 
				+	}
			
 
				+
			
 
				+	for i := ex; i < n; i += 8 {
			
 
				+		_dst := dst[i : i+8]
			
 
				+		_a := a[i : i+8]
			
 
				+		_b := b[i : i+8]
			
 
				+		_dst[0] = _a[0] ^ _b[0]
			
 
				+		_dst[1] = _a[1] ^ _b[1]
			
 
				+		_dst[2] = _a[2] ^ _b[2]
			
 
				+		_dst[3] = _a[3] ^ _b[3]
			
 
				+
			
 
				+		_dst[4] = _a[4] ^ _b[4]
			
 
				+		_dst[5] = _a[5] ^ _b[5]
			
 
				+		_dst[6] = _a[6] ^ _b[6]
			
 
				+		_dst[7] = _a[7] ^ _b[7]
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
			
 
				+// The arguments are assumed to be of equal length.
			
 
				+func fastXORWords(dst, a, b []byte) {
			
 
				+	dw := *(*[]uintptr)(unsafe.Pointer(&dst))
			
 
				+	aw := *(*[]uintptr)(unsafe.Pointer(&a))
			
 
				+	bw := *(*[]uintptr)(unsafe.Pointer(&b))
			
 
				+	n := len(b) / wordSize
			
 
				+	ex := n % 8
			
 
				+	for i := 0; i < ex; i++ {
			
 
				+		dw[i] = aw[i] ^ bw[i]
			
 
				+	}
			
 
				+
			
 
				+	for i := ex; i < n; i += 8 {
			
 
				+		_dw := dw[i : i+8]
			
 
				+		_aw := aw[i : i+8]
			
 
				+		_bw := bw[i : i+8]
			
 
				+		_dw[0] = _aw[0] ^ _bw[0]
			
 
				+		_dw[1] = _aw[1] ^ _bw[1]
			
 
				+		_dw[2] = _aw[2] ^ _bw[2]
			
 
				+		_dw[3] = _aw[3] ^ _bw[3]
			
 
				+		_dw[4] = _aw[4] ^ _bw[4]
			
 
				+		_dw[5] = _aw[5] ^ _bw[5]
			
 
				+		_dw[6] = _aw[6] ^ _bw[6]
			
 
				+		_dw[7] = _aw[7] ^ _bw[7]
			
 
				+	}
			
 
				+}
			
--- a/vendor/github.com/templexxx/xor/sse2_amd64.s
+++ b/vendor/github.com/templexxx/xor/sse2_amd64.s
@@ -0,0 +1,574 @@
 
				+#include "textflag.h"
			
 
				+
			
 
				+// addr of mem
			
 
				+#define DST BX
			
 
				+#define SRC SI
			
 
				+#define SRC0 TMP4
			
 
				+#define SRC1 TMP5
			
 
				+
			
 
				+// loop args
			
 
				+// num of vect
			
 
				+#define VECT CX
			
 
				+#define LEN DX
			
 
				+// pos of matrix
			
 
				+#define POS R8
			
 
				+
			
 
				+// tmp store
			
 
				+// num of vect or ...
			
 
				+#define TMP1 R9
			
 
				+// pos of matrix or ...
			
 
				+#define TMP2 R10
			
 
				+// store addr of data/parity or ...
			
 
				+#define TMP3 R11
			
 
				+#define TMP4 R12
			
 
				+#define TMP5 R13
			
 
				+#define TMP6 R14
			
 
				+
			
 
				+// func bytesSrc0(dst, src0, src1 []byte)
			
 
				+TEXT ·xorSrc0(SB), NOSPLIT, $0
			
 
				+	MOVQ  len+32(FP), LEN
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src0+24(FP), SRC0
			
 
				+	MOVQ  src1+48(FP), SRC1
			
 
				+	TESTQ $15, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop16b:
			
 
				+	MOVOU (SRC0)(POS*1), X0
			
 
				+	XORPD (SRC1)(POS*1), X0
			
 
				+	MOVOU X0, (DST)(POS*1)
			
 
				+	ADDQ  $16, POS
			
 
				+	CMPQ  LEN, POS
			
 
				+	JNE   loop16b
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVB  -1(SRC0)(LEN*1), TMP1
			
 
				+	MOVB  -1(SRC1)(LEN*1), TMP2
			
 
				+	XORB  TMP1, TMP2
			
 
				+	MOVB  TMP2, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $15, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP1
			
 
				+	ANDQ  $15, TMP1
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ -8(SRC0)(LEN*1), TMP2
			
 
				+	MOVQ -8(SRC1)(LEN*1), TMP3
			
 
				+	XORQ TMP2, TMP3
			
 
				+	MOVQ TMP3, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP1
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $16
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func bytesSrc1(dst, src0, src1 []byte)
			
 
				+TEXT ·xorSrc1(SB), NOSPLIT, $0
			
 
				+	MOVQ  len+56(FP), LEN
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src0+24(FP), SRC0
			
 
				+	MOVQ  src1+48(FP), SRC1
			
 
				+	TESTQ $15, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop16b:
			
 
				+	MOVOU (SRC0)(POS*1), X0
			
 
				+	XORPD (SRC1)(POS*1), X0
			
 
				+	MOVOU X0, (DST)(POS*1)
			
 
				+	ADDQ  $16, POS
			
 
				+	CMPQ  LEN, POS
			
 
				+	JNE   loop16b
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVB  -1(SRC0)(LEN*1), TMP1
			
 
				+	MOVB  -1(SRC1)(LEN*1), TMP2
			
 
				+	XORB  TMP1, TMP2
			
 
				+	MOVB  TMP2, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $15, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP1
			
 
				+	ANDQ  $15, TMP1
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ -8(SRC0)(LEN*1), TMP2
			
 
				+	MOVQ -8(SRC1)(LEN*1), TMP3
			
 
				+	XORQ TMP2, TMP3
			
 
				+	MOVQ TMP3, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP1
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $16
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func bytesSSE2mini(dst, src0, src1 []byte, size int)
			
 
				+TEXT ·bytesSSE2mini(SB), NOSPLIT, $0
			
 
				+	MOVQ  len+72(FP), LEN
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src0+24(FP), SRC0
			
 
				+	MOVQ  src1+48(FP), SRC1
			
 
				+	TESTQ $15, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop16b:
			
 
				+	MOVOU (SRC0)(POS*1), X0
			
 
				+	XORPD (SRC1)(POS*1), X0
			
 
				+
			
 
				+	// MOVOU (SRC1)(POS*1), X4
			
 
				+	// PXOR X4, X0
			
 
				+	MOVOU X0, (DST)(POS*1)
			
 
				+	ADDQ  $16, POS
			
 
				+	CMPQ  LEN, POS
			
 
				+	JNE   loop16b
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVB  -1(SRC0)(LEN*1), TMP1
			
 
				+	MOVB  -1(SRC1)(LEN*1), TMP2
			
 
				+	XORB  TMP1, TMP2
			
 
				+	MOVB  TMP2, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $15, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP1
			
 
				+	ANDQ  $15, TMP1
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ -8(SRC0)(LEN*1), TMP2
			
 
				+	MOVQ -8(SRC1)(LEN*1), TMP3
			
 
				+	XORQ TMP2, TMP3
			
 
				+	MOVQ TMP3, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP1
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $16
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func bytesSSE2small(dst, src0, src1 []byte, size int)
			
 
				+TEXT ·bytesSSE2small(SB), NOSPLIT, $0
			
 
				+	MOVQ  len+72(FP), LEN
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src0+24(FP), SRC0
			
 
				+	MOVQ  src1+48(FP), SRC1
			
 
				+	TESTQ $63, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop64b:
			
 
				+	MOVOU (SRC0)(POS*1), X0
			
 
				+	MOVOU 16(SRC0)(POS*1), X1
			
 
				+	MOVOU 32(SRC0)(POS*1), X2
			
 
				+	MOVOU 48(SRC0)(POS*1), X3
			
 
				+
			
 
				+	MOVOU (SRC1)(POS*1), X4
			
 
				+	MOVOU 16(SRC1)(POS*1), X5
			
 
				+	MOVOU 32(SRC1)(POS*1), X6
			
 
				+	MOVOU 48(SRC1)(POS*1), X7
			
 
				+
			
 
				+	PXOR X4, X0
			
 
				+	PXOR X5, X1
			
 
				+	PXOR X6, X2
			
 
				+	PXOR X7, X3
			
 
				+
			
 
				+	MOVOU X0, (DST)(POS*1)
			
 
				+	MOVOU X1, 16(DST)(POS*1)
			
 
				+	MOVOU X2, 32(DST)(POS*1)
			
 
				+	MOVOU X3, 48(DST)(POS*1)
			
 
				+
			
 
				+	ADDQ $64, POS
			
 
				+	CMPQ LEN, POS
			
 
				+	JNE  loop64b
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVB  -1(SRC0)(LEN*1), TMP1
			
 
				+	MOVB  -1(SRC1)(LEN*1), TMP2
			
 
				+	XORB  TMP1, TMP2
			
 
				+	MOVB  TMP2, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $63, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP1
			
 
				+	ANDQ  $63, TMP1
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ -8(SRC0)(LEN*1), TMP2
			
 
				+	MOVQ -8(SRC1)(LEN*1), TMP3
			
 
				+	XORQ TMP2, TMP3
			
 
				+	MOVQ TMP3, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP1
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $64
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func bytesSSE2big(dst, src0, src1 []byte, size int)
			
 
				+TEXT ·bytesSSE2big(SB), NOSPLIT, $0
			
 
				+	MOVQ  len+72(FP), LEN
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src0+24(FP), SRC0
			
 
				+	MOVQ  src1+48(FP), SRC1
			
 
				+	TESTQ $63, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop64b:
			
 
				+	MOVOU (SRC0)(POS*1), X0
			
 
				+	MOVOU 16(SRC0)(POS*1), X1
			
 
				+	MOVOU 32(SRC0)(POS*1), X2
			
 
				+	MOVOU 48(SRC0)(POS*1), X3
			
 
				+
			
 
				+	MOVOU (SRC1)(POS*1), X4
			
 
				+	MOVOU 16(SRC1)(POS*1), X5
			
 
				+	MOVOU 32(SRC1)(POS*1), X6
			
 
				+	MOVOU 48(SRC1)(POS*1), X7
			
 
				+
			
 
				+	PXOR X4, X0
			
 
				+	PXOR X5, X1
			
 
				+	PXOR X6, X2
			
 
				+	PXOR X7, X3
			
 
				+
			
 
				+	LONG $0xe70f4266; WORD $0x0304             // MOVNTDQ
			
 
				+	LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
			
 
				+	LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
			
 
				+	LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
			
 
				+
			
 
				+	ADDQ $64, POS
			
 
				+	CMPQ LEN, POS
			
 
				+	JNE  loop64b
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVB  -1(SRC0)(LEN*1), TMP1
			
 
				+	MOVB  -1(SRC1)(LEN*1), TMP2
			
 
				+	XORB  TMP1, TMP2
			
 
				+	MOVB  TMP2, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $63, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP1
			
 
				+	ANDQ  $63, TMP1
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ -8(SRC0)(LEN*1), TMP2
			
 
				+	MOVQ -8(SRC1)(LEN*1), TMP3
			
 
				+	XORQ TMP2, TMP3
			
 
				+	MOVQ TMP3, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP1
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $64
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func matrixSSE2small(dst []byte, src [][]byte)
			
 
				+TEXT ·matrixSSE2small(SB), NOSPLIT, $0
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src+24(FP), SRC
			
 
				+	MOVQ  vec+32(FP), VECT
			
 
				+	MOVQ  len+8(FP), LEN
			
 
				+	TESTQ $63, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop64b:
			
 
				+	MOVQ  VECT, TMP1
			
 
				+	SUBQ  $2, TMP1
			
 
				+	MOVQ  $0, TMP2
			
 
				+	MOVQ  (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ  TMP3, TMP4
			
 
				+	MOVOU (TMP3)(POS*1), X0
			
 
				+	MOVOU 16(TMP4)(POS*1), X1
			
 
				+	MOVOU 32(TMP3)(POS*1), X2
			
 
				+	MOVOU 48(TMP4)(POS*1), X3
			
 
				+
			
 
				+next_vect:
			
 
				+	ADDQ  $24, TMP2
			
 
				+	MOVQ  (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ  TMP3, TMP4
			
 
				+	MOVOU (TMP3)(POS*1), X4
			
 
				+	MOVOU 16(TMP4)(POS*1), X5
			
 
				+	MOVOU 32(TMP3)(POS*1), X6
			
 
				+	MOVOU 48(TMP4)(POS*1), X7
			
 
				+	PXOR  X4, X0
			
 
				+	PXOR  X5, X1
			
 
				+	PXOR  X6, X2
			
 
				+	PXOR  X7, X3
			
 
				+	SUBQ  $1, TMP1
			
 
				+	JGE   next_vect
			
 
				+
			
 
				+	MOVOU X0, (DST)(POS*1)
			
 
				+	MOVOU X1, 16(DST)(POS*1)
			
 
				+	MOVOU X2, 32(DST)(POS*1)
			
 
				+	MOVOU X3, 48(DST)(POS*1)
			
 
				+
			
 
				+	ADDQ $64, POS
			
 
				+	CMPQ LEN, POS
			
 
				+	JNE  loop64b
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVQ VECT, TMP1
			
 
				+	MOVQ $0, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	SUBQ $2, TMP1
			
 
				+	MOVB -1(TMP3)(LEN*1), TMP5
			
 
				+
			
 
				+next_vect_1b:
			
 
				+	ADDQ $24, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	MOVB -1(TMP3)(LEN*1), TMP6
			
 
				+	XORB TMP6, TMP5
			
 
				+	SUBQ $1, TMP1
			
 
				+	JGE  next_vect_1b
			
 
				+
			
 
				+	MOVB  TMP5, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $63, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP4
			
 
				+	ANDQ  $63, TMP4
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ VECT, TMP1
			
 
				+	MOVQ $0, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	SUBQ $2, TMP1
			
 
				+	MOVQ -8(TMP3)(LEN*1), TMP5
			
 
				+
			
 
				+next_vect_8b:
			
 
				+	ADDQ $24, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ -8(TMP3)(LEN*1), TMP6
			
 
				+	XORQ TMP6, TMP5
			
 
				+	SUBQ $1, TMP1
			
 
				+	JGE  next_vect_8b
			
 
				+
			
 
				+	MOVQ TMP5, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP4
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $64
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+// func matrixSSE2big(dst []byte, src [][]byte)
			
 
				+TEXT ·matrixSSE2big(SB), NOSPLIT, $0
			
 
				+	MOVQ  dst+0(FP), DST
			
 
				+	MOVQ  src+24(FP), SRC
			
 
				+	MOVQ  vec+32(FP), VECT
			
 
				+	MOVQ  len+8(FP), LEN
			
 
				+	TESTQ $63, LEN
			
 
				+	JNZ   not_aligned
			
 
				+
			
 
				+aligned:
			
 
				+	MOVQ $0, POS
			
 
				+
			
 
				+loop64b:
			
 
				+	MOVQ  VECT, TMP1
			
 
				+	SUBQ  $2, TMP1
			
 
				+	MOVQ  $0, TMP2
			
 
				+	MOVQ  (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ  TMP3, TMP4
			
 
				+	MOVOU (TMP3)(POS*1), X0
			
 
				+	MOVOU 16(TMP4)(POS*1), X1
			
 
				+	MOVOU 32(TMP3)(POS*1), X2
			
 
				+	MOVOU 48(TMP4)(POS*1), X3
			
 
				+
			
 
				+next_vect:
			
 
				+	ADDQ  $24, TMP2
			
 
				+	MOVQ  (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ  TMP3, TMP4
			
 
				+	MOVOU (TMP3)(POS*1), X4
			
 
				+	MOVOU 16(TMP4)(POS*1), X5
			
 
				+	MOVOU 32(TMP3)(POS*1), X6
			
 
				+	MOVOU 48(TMP4)(POS*1), X7
			
 
				+	PXOR  X4, X0
			
 
				+	PXOR  X5, X1
			
 
				+	PXOR  X6, X2
			
 
				+	PXOR  X7, X3
			
 
				+	SUBQ  $1, TMP1
			
 
				+	JGE   next_vect
			
 
				+
			
 
				+	LONG $0xe70f4266; WORD $0x0304
			
 
				+	LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
			
 
				+	LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
			
 
				+	LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
			
 
				+
			
 
				+	ADDQ $64, POS
			
 
				+	CMPQ LEN, POS
			
 
				+	JNE  loop64b
			
 
				+	RET
			
 
				+
			
 
				+loop_1b:
			
 
				+	MOVQ VECT, TMP1
			
 
				+	MOVQ $0, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	SUBQ $2, TMP1
			
 
				+	MOVB -1(TMP3)(LEN*1), TMP5
			
 
				+
			
 
				+next_vect_1b:
			
 
				+	ADDQ $24, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	MOVB -1(TMP3)(LEN*1), TMP6
			
 
				+	XORB TMP6, TMP5
			
 
				+	SUBQ $1, TMP1
			
 
				+	JGE  next_vect_1b
			
 
				+
			
 
				+	MOVB  TMP5, -1(DST)(LEN*1)
			
 
				+	SUBQ  $1, LEN
			
 
				+	TESTQ $7, LEN
			
 
				+	JNZ   loop_1b
			
 
				+
			
 
				+	CMPQ  LEN, $0
			
 
				+	JE    ret
			
 
				+	TESTQ $63, LEN
			
 
				+	JZ    aligned
			
 
				+
			
 
				+not_aligned:
			
 
				+	TESTQ $7, LEN
			
 
				+	JNE   loop_1b
			
 
				+	MOVQ  LEN, TMP4
			
 
				+	ANDQ  $63, TMP4
			
 
				+
			
 
				+loop_8b:
			
 
				+	MOVQ VECT, TMP1
			
 
				+	MOVQ $0, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	SUBQ $2, TMP1
			
 
				+	MOVQ -8(TMP3)(LEN*1), TMP5
			
 
				+
			
 
				+next_vect_8b:
			
 
				+	ADDQ $24, TMP2
			
 
				+	MOVQ (SRC)(TMP2*1), TMP3
			
 
				+	MOVQ -8(TMP3)(LEN*1), TMP6
			
 
				+	XORQ TMP6, TMP5
			
 
				+	SUBQ $1, TMP1
			
 
				+	JGE  next_vect_8b
			
 
				+
			
 
				+	MOVQ TMP5, -8(DST)(LEN*1)
			
 
				+	SUBQ $8, LEN
			
 
				+	SUBQ $8, TMP4
			
 
				+	JG   loop_8b
			
 
				+
			
 
				+	CMPQ LEN, $64
			
 
				+	JGE  aligned
			
 
				+	RET
			
 
				+
			
 
				+ret:
			
 
				+	RET
			
 
				+
			
 
				+TEXT ·hasSSE2(SB), NOSPLIT, $0
			
 
				+	XORQ AX, AX
			
 
				+	INCL AX
			
 
				+	CPUID
			
 
				+	SHRQ $26, DX
			
 
				+	ANDQ $1, DX
			
 
				+	MOVB DX, ret+0(FP)
			
 
				+	RET
			
 
				+
			
--- a/vendor/github.com/templexxx/xor/xor.go
+++ b/vendor/github.com/templexxx/xor/xor.go
@@ -0,0 +1,49 @@
 
				+package xor
			
 
				+
			
 
				+// SIMD Extensions
			
 
				+const (
			
 
				+	none = iota
			
 
				+	avx2
			
 
				+	// first introduced by Intel with the initial version of the Pentium 4 in 2001
			
 
				+	// so I think we can assume all amd64 has sse2
			
 
				+	sse2
			
 
				+)
			
 
				+
			
 
				+var extension = none
			
 
				+
			
 
				+// Bytes : chose the shortest one as xor size
			
 
				+// it's better to use it for big data ( > 64bytes )
			
 
				+func Bytes(dst, src0, src1 []byte) {
			
 
				+	size := len(dst)
			
 
				+	if size > len(src0) {
			
 
				+		size = len(src0)
			
 
				+	}
			
 
				+	if size > len(src1) {
			
 
				+		size = len(src1)
			
 
				+	}
			
 
				+	xorBytes(dst, src0, src1, size)
			
 
				+}
			
 
				+
			
 
				+// BytesSameLen : all slice's length must be equal
			
 
				+// cut size branch, save time for small data
			
 
				+func BytesSameLen(dst, src0, src1 []byte) {
			
 
				+	xorSrc1(dst, src0, src1)
			
 
				+}
			
 
				+
			
 
				+// BytesSrc0 : src1 >= src0, dst >= src0
			
 
				+// xor src0's len bytes
			
 
				+func BytesSrc0(dst, src0, src1 []byte) {
			
 
				+	xorSrc0(dst, src0, src1)
			
 
				+}
			
 
				+
			
 
				+// BytesSrc1 : src0 >= src1, dst >= src1
			
 
				+// xor src1's len bytes
			
 
				+func BytesSrc1(dst, src0, src1 []byte) {
			
 
				+	xorSrc1(dst, src0, src1)
			
 
				+}
			
 
				+
			
 
				+// Matrix : all slice's length must be equal && != 0
			
 
				+// len(src) must >= 2
			
 
				+func Matrix(dst []byte, src [][]byte) {
			
 
				+	xorMatrix(dst, src)
			
 
				+}
			
--- a/vendor/github.com/templexxx/xor/xor_amd64.go
+++ b/vendor/github.com/templexxx/xor/xor_amd64.go
@@ -0,0 +1,120 @@
 
				+package xor
			
 
				+
			
 
				+import "github.com/templexxx/cpufeat"
			
 
				+
			
 
				+func init() {
			
 
				+	getEXT()
			
 
				+}
			
 
				+
			
 
				+func getEXT() {
			
 
				+	if cpufeat.X86.HasAVX2 {
			
 
				+		extension = avx2
			
 
				+	} else {
			
 
				+		extension = sse2
			
 
				+	}
			
 
				+	return
			
 
				+}
			
 
				+
			
 
				+func xorBytes(dst, src0, src1 []byte, size int) {
			
 
				+	switch extension {
			
 
				+	case avx2:
			
 
				+		bytesAVX2(dst, src0, src1, size)
			
 
				+	default:
			
 
				+		bytesSSE2(dst, src0, src1, size)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// non-temporal hint store
			
 
				+const nontmp = 8 * 1024
			
 
				+const avx2loopsize = 128
			
 
				+
			
 
				+func bytesAVX2(dst, src0, src1 []byte, size int) {
			
 
				+	if size < avx2loopsize {
			
 
				+		bytesAVX2mini(dst, src0, src1, size)
			
 
				+	} else if size >= avx2loopsize && size <= nontmp {
			
 
				+		bytesAVX2small(dst, src0, src1, size)
			
 
				+	} else {
			
 
				+		bytesAVX2big(dst, src0, src1, size)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+const sse2loopsize = 64
			
 
				+
			
 
				+func bytesSSE2(dst, src0, src1 []byte, size int) {
			
 
				+	if size < sse2loopsize {
			
 
				+		bytesSSE2mini(dst, src0, src1, size)
			
 
				+	} else if size >= sse2loopsize && size <= nontmp {
			
 
				+		bytesSSE2small(dst, src0, src1, size)
			
 
				+	} else {
			
 
				+		bytesSSE2big(dst, src0, src1, size)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func xorMatrix(dst []byte, src [][]byte) {
			
 
				+	switch extension {
			
 
				+	case avx2:
			
 
				+		matrixAVX2(dst, src)
			
 
				+	default:
			
 
				+		matrixSSE2(dst, src)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func matrixAVX2(dst []byte, src [][]byte) {
			
 
				+	size := len(dst)
			
 
				+	if size > nontmp {
			
 
				+		matrixAVX2big(dst, src)
			
 
				+	} else {
			
 
				+		matrixAVX2small(dst, src)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func matrixSSE2(dst []byte, src [][]byte) {
			
 
				+	size := len(dst)
			
 
				+	if size > nontmp {
			
 
				+		matrixSSE2big(dst, src)
			
 
				+	} else {
			
 
				+		matrixSSE2small(dst, src)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//go:noescape
			
 
				+func xorSrc0(dst, src0, src1 []byte)
			
 
				+
			
 
				+//go:noescape
			
 
				+func xorSrc1(dst, src0, src1 []byte)
			
 
				+
			
 
				+//go:noescape
			
 
				+func bytesAVX2mini(dst, src0, src1 []byte, size int)
			
 
				+
			
 
				+//go:noescape
			
 
				+func bytesAVX2big(dst, src0, src1 []byte, size int)
			
 
				+
			
 
				+//go:noescape
			
 
				+func bytesAVX2small(dst, src0, src1 []byte, size int)
			
 
				+
			
 
				+//go:noescape
			
 
				+func bytesSSE2mini(dst, src0, src1 []byte, size int)
			
 
				+
			
 
				+//go:noescape
			
 
				+func bytesSSE2small(dst, src0, src1 []byte, size int)
			
 
				+
			
 
				+//go:noescape
			
 
				+func bytesSSE2big(dst, src0, src1 []byte, size int)
			
 
				+
			
 
				+//go:noescape
			
 
				+func matrixAVX2small(dst []byte, src [][]byte)
			
 
				+
			
 
				+//go:noescape
			
 
				+func matrixAVX2big(dst []byte, src [][]byte)
			
 
				+
			
 
				+//go:noescape
			
 
				+func matrixSSE2small(dst []byte, src [][]byte)
			
 
				+
			
 
				+//go:noescape
			
 
				+func matrixSSE2big(dst []byte, src [][]byte)
			
 
				+
			
 
				+//go:noescape
			
 
				+func hasAVX2() bool
			
 
				+
			
 
				+//go:noescape
			
 
				+func hasSSE2() bool
			
--- a/vendor/github.com/templexxx/xor/xor_other.go
+++ b/vendor/github.com/templexxx/xor/xor_other.go
@@ -0,0 +1,19 @@
 
				+// +build !amd64 noasm
			
 
				+
			
 
				+package xor
			
 
				+
			
 
				+func xorBytes(dst, src0, src1 []byte, size int) {
			
 
				+	bytesNoSIMD(dst, src0, src1, size)
			
 
				+}
			
 
				+
			
 
				+func xorMatrix(dst []byte, src [][]byte) {
			
 
				+	matrixNoSIMD(dst, src)
			
 
				+}
			
 
				+
			
 
				+func xorSrc0(dst, src0, src1 []byte) {
			
 
				+	bytesNoSIMD(dst, src0, src1, len(src0))
			
 
				+}
			
 
				+
			
 
				+func xorSrc1(dst, src0, src1 []byte) {
			
 
				+	bytesNoSIMD(dst, src0, src1, len(src1))
			
 
				+}
			
--- a/vendor/github.com/tjfoc/gmsm/LICENSE
+++ b/vendor/github.com/tjfoc/gmsm/LICENSE
@@ -0,0 +1,201 @@
 
				+                                 Apache License
			
 
				+                           Version 2.0, January 2004
			
 
				+                        http://www.apache.org/licenses/
			
 
				+
			
 
				+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
			
 
				+
			
 
				+   1. Definitions.
			
 
				+
			
 
				+      "License" shall mean the terms and conditions for use, reproduction,
			
 
				+      and distribution as defined by Sections 1 through 9 of this document.
			
 
				+
			
 
				+      "Licensor" shall mean the copyright owner or entity authorized by
			
 
				+      the copyright owner that is granting the License.
			
 
				+
			
 
				+      "Legal Entity" shall mean the union of the acting entity and all
			
 
				+      other entities that control, are controlled by, or are under common
			
 
				+      control with that entity. For the purposes of this definition,
			
 
				+      "control" means (i) the power, direct or indirect, to cause the
			
 
				+      direction or management of such entity, whether by contract or
			
 
				+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
			
 
				+      outstanding shares, or (iii) beneficial ownership of such entity.
			
 
				+
			
 
				+      "You" (or "Your") shall mean an individual or Legal Entity
			
 
				+      exercising permissions granted by this License.
			
 
				+
			
 
				+      "Source" form shall mean the preferred form for making modifications,
			
 
				+      including but not limited to software source code, documentation
			
 
				+      source, and configuration files.
			
 
				+
			
 
				+      "Object" form shall mean any form resulting from mechanical
			
 
				+      transformation or translation of a Source form, including but
			
 
				+      not limited to compiled object code, generated documentation,
			
 
				+      and conversions to other media types.
			
 
				+
			
 
				+      "Work" shall mean the work of authorship, whether in Source or
			
 
				+      Object form, made available under the License, as indicated by a
			
 
				+      copyright notice that is included in or attached to the work
			
 
				+      (an example is provided in the Appendix below).
			
 
				+
			
 
				+      "Derivative Works" shall mean any work, whether in Source or Object
			
 
				+      form, that is based on (or derived from) the Work and for which the
			
 
				+      editorial revisions, annotations, elaborations, or other modifications
			
 
				+      represent, as a whole, an original work of authorship. For the purposes
			
 
				+      of this License, Derivative Works shall not include works that remain
			
 
				+      separable from, or merely link (or bind by name) to the interfaces of,
			
 
				+      the Work and Derivative Works thereof.
			
 
				+
			
 
				+      "Contribution" shall mean any work of authorship, including
			
 
				+      the original version of the Work and any modifications or additions
			
 
				+      to that Work or Derivative Works thereof, that is intentionally
			
 
				+      submitted to Licensor for inclusion in the Work by the copyright owner
			
 
				+      or by an individual or Legal Entity authorized to submit on behalf of
			
 
				+      the copyright owner. For the purposes of this definition, "submitted"
			
 
				+      means any form of electronic, verbal, or written communication sent
			
 
				+      to the Licensor or its representatives, including but not limited to
			
 
				+      communication on electronic mailing lists, source code control systems,
			
 
				+      and issue tracking systems that are managed by, or on behalf of, the
			
 
				+      Licensor for the purpose of discussing and improving the Work, but
			
 
				+      excluding communication that is conspicuously marked or otherwise
			
 
				+      designated in writing by the copyright owner as "Not a Contribution."
			
 
				+
			
 
				+      "Contributor" shall mean Licensor and any individual or Legal Entity
			
 
				+      on behalf of whom a Contribution has been received by Licensor and
			
 
				+      subsequently incorporated within the Work.
			
 
				+
			
 
				+   2. Grant of Copyright License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      copyright license to reproduce, prepare Derivative Works of,
			
 
				+      publicly display, publicly perform, sublicense, and distribute the
			
 
				+      Work and such Derivative Works in Source or Object form.
			
 
				+
			
 
				+   3. Grant of Patent License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      (except as stated in this section) patent license to make, have made,
			
 
				+      use, offer to sell, sell, import, and otherwise transfer the Work,
			
 
				+      where such license applies only to those patent claims licensable
			
 
				+      by such Contributor that are necessarily infringed by their
			
 
				+      Contribution(s) alone or by combination of their Contribution(s)
			
 
				+      with the Work to which such Contribution(s) was submitted. If You
			
 
				+      institute patent litigation against any entity (including a
			
 
				+      cross-claim or counterclaim in a lawsuit) alleging that the Work
			
 
				+      or a Contribution incorporated within the Work constitutes direct
			
 
				+      or contributory patent infringement, then any patent licenses
			
 
				+      granted to You under this License for that Work shall terminate
			
 
				+      as of the date such litigation is filed.
			
 
				+
			
 
				+   4. Redistribution. You may reproduce and distribute copies of the
			
 
				+      Work or Derivative Works thereof in any medium, with or without
			
 
				+      modifications, and in Source or Object form, provided that You
			
 
				+      meet the following conditions:
			
 
				+
			
 
				+      (a) You must give any other recipients of the Work or
			
 
				+          Derivative Works a copy of this License; and
			
 
				+
			
 
				+      (b) You must cause any modified files to carry prominent notices
			
 
				+          stating that You changed the files; and
			
 
				+
			
 
				+      (c) You must retain, in the Source form of any Derivative Works
			
 
				+          that You distribute, all copyright, patent, trademark, and
			
 
				+          attribution notices from the Source form of the Work,
			
 
				+          excluding those notices that do not pertain to any part of
			
 
				+          the Derivative Works; and
			
 
				+
			
 
				+      (d) If the Work includes a "NOTICE" text file as part of its
			
 
				+          distribution, then any Derivative Works that You distribute must
			
 
				+          include a readable copy of the attribution notices contained
			
 
				+          within such NOTICE file, excluding those notices that do not
			
 
				+          pertain to any part of the Derivative Works, in at least one
			
 
				+          of the following places: within a NOTICE text file distributed
			
 
				+          as part of the Derivative Works; within the Source form or
			
 
				+          documentation, if provided along with the Derivative Works; or,
			
 
				+          within a display generated by the Derivative Works, if and
			
 
				+          wherever such third-party notices normally appear. The contents
			
 
				+          of the NOTICE file are for informational purposes only and
			
 
				+          do not modify the License. You may add Your own attribution
			
 
				+          notices within Derivative Works that You distribute, alongside
			
 
				+          or as an addendum to the NOTICE text from the Work, provided
			
 
				+          that such additional attribution notices cannot be construed
			
 
				+          as modifying the License.
			
 
				+
			
 
				+      You may add Your own copyright statement to Your modifications and
			
 
				+      may provide additional or different license terms and conditions
			
 
				+      for use, reproduction, or distribution of Your modifications, or
			
 
				+      for any such Derivative Works as a whole, provided Your use,
			
 
				+      reproduction, and distribution of the Work otherwise complies with
			
 
				+      the conditions stated in this License.
			
 
				+
			
 
				+   5. Submission of Contributions. Unless You explicitly state otherwise,
			
 
				+      any Contribution intentionally submitted for inclusion in the Work
			
 
				+      by You to the Licensor shall be under the terms and conditions of
			
 
				+      this License, without any additional terms or conditions.
			
 
				+      Notwithstanding the above, nothing herein shall supersede or modify
			
 
				+      the terms of any separate license agreement you may have executed
			
 
				+      with Licensor regarding such Contributions.
			
 
				+
			
 
				+   6. Trademarks. This License does not grant permission to use the trade
			
 
				+      names, trademarks, service marks, or product names of the Licensor,
			
 
				+      except as required for reasonable and customary use in describing the
			
 
				+      origin of the Work and reproducing the content of the NOTICE file.
			
 
				+
			
 
				+   7. Disclaimer of Warranty. Unless required by applicable law or
			
 
				+      agreed to in writing, Licensor provides the Work (and each
			
 
				+      Contributor provides its Contributions) on an "AS IS" BASIS,
			
 
				+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
			
 
				+      implied, including, without limitation, any warranties or conditions
			
 
				+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
			
 
				+      PARTICULAR PURPOSE. You are solely responsible for determining the
			
 
				+      appropriateness of using or redistributing the Work and assume any
			
 
				+      risks associated with Your exercise of permissions under this License.
			
 
				+
			
 
				+   8. Limitation of Liability. In no event and under no legal theory,
			
 
				+      whether in tort (including negligence), contract, or otherwise,
			
 
				+      unless required by applicable law (such as deliberate and grossly
			
 
				+      negligent acts) or agreed to in writing, shall any Contributor be
			
 
				+      liable to You for damages, including any direct, indirect, special,
			
 
				+      incidental, or consequential damages of any character arising as a
			
 
				+      result of this License or out of the use or inability to use the
			
 
				+      Work (including but not limited to damages for loss of goodwill,
			
 
				+      work stoppage, computer failure or malfunction, or any and all
			
 
				+      other commercial damages or losses), even if such Contributor
			
 
				+      has been advised of the possibility of such damages.
			
 
				+
			
 
				+   9. Accepting Warranty or Additional Liability. While redistributing
			
 
				+      the Work or Derivative Works thereof, You may choose to offer,
			
 
				+      and charge a fee for, acceptance of support, warranty, indemnity,
			
 
				+      or other liability obligations and/or rights consistent with this
			
 
				+      License. However, in accepting such obligations, You may act only
			
 
				+      on Your own behalf and on Your sole responsibility, not on behalf
			
 
				+      of any other Contributor, and only if You agree to indemnify,
			
 
				+      defend, and hold each Contributor harmless for any liability
			
 
				+      incurred by, or claims asserted against, such Contributor by reason
			
 
				+      of your accepting any such warranty or additional liability.
			
 
				+
			
 
				+   END OF TERMS AND CONDITIONS
			
 
				+
			
 
				+   APPENDIX: How to apply the Apache License to your work.
			
 
				+
			
 
				+      To apply the Apache License to your work, attach the following
			
 
				+      boilerplate notice, with the fields enclosed by brackets "{}"
			
 
				+      replaced with your own identifying information. (Don't include
			
 
				+      the brackets!)  The text should be enclosed in the appropriate
			
 
				+      comment syntax for the file format. We also recommend that a
			
 
				+      file or class name and description of purpose be included on the
			
 
				+      same "printed page" as the copyright notice for easier
			
 
				+      identification within third-party archives.
			
 
				+
			
 
				+   Copyright {yyyy} {name of copyright owner}
			
 
				+
			
 
				+   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+   you may not use this file except in compliance with the License.
			
 
				+   You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
--- a/vendor/github.com/tjfoc/gmsm/sm4/sm4.go
+++ b/vendor/github.com/tjfoc/gmsm/sm4/sm4.go
@@ -0,0 +1,291 @@
 
				+/*
			
 
				+Copyright Suzhou Tongji Fintech Research Institute 2017 All Rights Reserved.
			
 
				+Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+you may not use this file except in compliance with the License.
			
 
				+You may obtain a copy of the License at
			
 
				+
			
 
				+                 http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing, software
			
 
				+distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+See the License for the specific language governing permissions and
			
 
				+limitations under the License.
			
 
				+*/
			
 
				+
			
 
				+package sm4
			
 
				+
			
 
				+import (
			
 
				+	"crypto/cipher"
			
 
				+	"crypto/rand"
			
 
				+	"crypto/x509"
			
 
				+	"encoding/pem"
			
 
				+	"errors"
			
 
				+	"io/ioutil"
			
 
				+	"os"
			
 
				+	"strconv"
			
 
				+)
			
 
				+
			
 
				+const BlockSize = 16
			
 
				+
			
 
				+type SM4Key []byte
			
 
				+
			
 
				+type KeySizeError int
			
 
				+
			
 
				+// Cipher is an instance of SM4 encryption.
			
 
				+type Sm4Cipher struct {
			
 
				+	subkeys []uint32
			
 
				+	block1  []uint32
			
 
				+	block2  []byte
			
 
				+}
			
 
				+
			
 
				+// sm4密钥参量
			
 
				+var fk = [4]uint32{
			
 
				+	0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc,
			
 
				+}
			
 
				+
			
 
				+// sm4密钥参量
			
 
				+var ck = [32]uint32{
			
 
				+	0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
			
 
				+	0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
			
 
				+	0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
			
 
				+	0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
			
 
				+	0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
			
 
				+	0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
			
 
				+	0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
			
 
				+	0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279,
			
 
				+}
			
 
				+
			
 
				+// sm4密钥参量
			
 
				+var sbox = [256]uint8{
			
 
				+	0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
			
 
				+	0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
			
 
				+	0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
			
 
				+	0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
			
 
				+	0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
			
 
				+	0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
			
 
				+	0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
			
 
				+	0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
			
 
				+	0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
			
 
				+	0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
			
 
				+	0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
			
 
				+	0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
			
 
				+	0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
			
 
				+	0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
			
 
				+	0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
			
 
				+	0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
			
 
				+}
			
 
				+
			
 
				+func rl(x uint32, i uint8) uint32 { return (x << (i % 32)) | (x >> (32 - (i % 32))) }
			
 
				+
			
 
				+func l0(b uint32) uint32 { return b ^ rl(b, 13) ^ rl(b, 23) }
			
 
				+
			
 
				+func l1(b uint32) uint32 { return b ^ rl(b, 2) ^ rl(b, 10) ^ rl(b, 18) ^ rl(b, 24) }
			
 
				+
			
 
				+func feistel0(x0, x1, x2, x3, rk uint32) uint32 { return x0 ^ l0(p(x1^x2^x3^rk)) }
			
 
				+
			
 
				+func feistel1(x0, x1, x2, x3, rk uint32) uint32 { return x0 ^ l1(p(x1^x2^x3^rk)) }
			
 
				+
			
 
				+//非线性变换τ(.)
			
 
				+func p(a uint32) uint32 {
			
 
				+	return (uint32(sbox[a>>24]) << 24) ^ (uint32(sbox[(a>>16)&0xff]) << 16) ^ (uint32(sbox[(a>>8)&0xff]) << 8) ^ uint32(sbox[(a)&0xff])
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+func permuteInitialBlock(block []byte) []uint32 {
			
 
				+	b := make([]uint32, 4, 4)
			
 
				+	for i := 0; i < 4; i++ {
			
 
				+		b[i] = (uint32(block[i*4]) << 24) | (uint32(block[i*4+1]) << 16) |
			
 
				+			(uint32(block[i*4+2]) << 8) | (uint32(block[i*4+3]))
			
 
				+	}
			
 
				+	return b
			
 
				+}
			
 
				+
			
 
				+func permuteFinalBlock(block []uint32) []byte {
			
 
				+	b := make([]byte, 16, 16)
			
 
				+	for i := 0; i < 4; i++ {
			
 
				+		b[i*4] = uint8(block[i] >> 24)
			
 
				+		b[i*4+1] = uint8(block[i] >> 16)
			
 
				+		b[i*4+2] = uint8(block[i] >> 8)
			
 
				+		b[i*4+3] = uint8(block[i])
			
 
				+	}
			
 
				+	return b
			
 
				+}
			
 
				+
			
 
				+func cryptBlock(subkeys []uint32, dst, src []byte, decrypt bool) {
			
 
				+	var tm uint32
			
 
				+	b := permuteInitialBlock(src)
			
 
				+	for i := 0; i < 32; i++ {
			
 
				+		if decrypt {
			
 
				+			tm = feistel1(b[0], b[1], b[2], b[3], subkeys[31-i])
			
 
				+		} else {
			
 
				+			tm = feistel1(b[0], b[1], b[2], b[3], subkeys[i])
			
 
				+		}
			
 
				+		b[0], b[1], b[2], b[3] = b[1], b[2], b[3], tm
			
 
				+	}
			
 
				+	b[0], b[1], b[2], b[3] = b[3], b[2], b[1], b[0]
			
 
				+	copy(dst, permuteFinalBlock(b))
			
 
				+}
			
 
				+*/
			
 
				+
			
 
				+func permuteInitialBlock(b []uint32, block []byte) {
			
 
				+	for i := 0; i < 4; i++ {
			
 
				+		b[i] = (uint32(block[i*4]) << 24) | (uint32(block[i*4+1]) << 16) |
			
 
				+			(uint32(block[i*4+2]) << 8) | (uint32(block[i*4+3]))
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func permuteFinalBlock(b []byte, block []uint32) {
			
 
				+	for i := 0; i < 4; i++ {
			
 
				+		b[i*4] = uint8(block[i] >> 24)
			
 
				+		b[i*4+1] = uint8(block[i] >> 16)
			
 
				+		b[i*4+2] = uint8(block[i] >> 8)
			
 
				+		b[i*4+3] = uint8(block[i])
			
 
				+	}
			
 
				+}
			
 
				+func cryptBlock(subkeys []uint32, b []uint32, r []byte, dst, src []byte, decrypt bool) {
			
 
				+	var tm uint32
			
 
				+
			
 
				+	permuteInitialBlock(b, src)
			
 
				+	for i := 0; i < 32; i++ {
			
 
				+		if decrypt {
			
 
				+			tm = b[0] ^ l1(p(b[1]^b[2]^b[3]^subkeys[31 - i]))
			
 
				+			//			tm = feistel1(b[0], b[1], b[2], b[3], subkeys[31-i])
			
 
				+		} else {
			
 
				+			tm = b[0] ^ l1(p(b[1]^b[2]^b[3]^subkeys[i]))
			
 
				+			//	tm = feistel1(b[0], b[1], b[2], b[3], subkeys[i])
			
 
				+		}
			
 
				+		b[0], b[1], b[2], b[3] = b[1], b[2], b[3], tm
			
 
				+	}
			
 
				+	b[0], b[1], b[2], b[3] = b[3], b[2], b[1], b[0]
			
 
				+	permuteFinalBlock(r, b)
			
 
				+	copy(dst, r)
			
 
				+}
			
 
				+
			
 
				+func generateSubKeys(key []byte) []uint32 {
			
 
				+	subkeys := make([]uint32, 32)
			
 
				+	b := make([]uint32, 4)
			
 
				+	//	b := permuteInitialBlock(key)
			
 
				+	permuteInitialBlock(b, key)
			
 
				+	b[0] ^= fk[0]
			
 
				+	b[1] ^= fk[1]
			
 
				+	b[2] ^= fk[2]
			
 
				+	b[3] ^= fk[3]
			
 
				+	for i := 0; i < 32; i++ {
			
 
				+		subkeys[i] = feistel0(b[0], b[1], b[2], b[3], ck[i])
			
 
				+		b[0], b[1], b[2], b[3] = b[1], b[2], b[3], subkeys[i]
			
 
				+	}
			
 
				+	return subkeys
			
 
				+}
			
 
				+
			
 
				+func EncryptBlock(key SM4Key, dst, src []byte) {
			
 
				+	subkeys := generateSubKeys(key)
			
 
				+	cryptBlock(subkeys, make([]uint32, 4), make([]byte, 16), dst, src, false)
			
 
				+}
			
 
				+
			
 
				+func DecryptBlock(key SM4Key, dst, src []byte) {
			
 
				+	subkeys := generateSubKeys(key)
			
 
				+	cryptBlock(subkeys, make([]uint32, 4), make([]byte, 16), dst, src, true)
			
 
				+}
			
 
				+
			
 
				+func ReadKeyFromMem(data []byte, pwd []byte) (SM4Key, error) {
			
 
				+	block, _ := pem.Decode(data)
			
 
				+	if x509.IsEncryptedPEMBlock(block) {
			
 
				+		if block.Type != "SM4 ENCRYPTED KEY" {
			
 
				+			return nil, errors.New("SM4: unknown type")
			
 
				+		}
			
 
				+		if pwd == nil {
			
 
				+			return nil, errors.New("SM4: need passwd")
			
 
				+		}
			
 
				+		data, err := x509.DecryptPEMBlock(block, pwd)
			
 
				+		if err != nil {
			
 
				+			return nil, err
			
 
				+		}
			
 
				+		return data, nil
			
 
				+	}
			
 
				+	if block.Type != "SM4 KEY" {
			
 
				+		return nil, errors.New("SM4: unknown type")
			
 
				+	}
			
 
				+	return block.Bytes, nil
			
 
				+}
			
 
				+
			
 
				+func ReadKeyFromPem(FileName string, pwd []byte) (SM4Key, error) {
			
 
				+	data, err := ioutil.ReadFile(FileName)
			
 
				+	if err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	return ReadKeyFromMem(data, pwd)
			
 
				+}
			
 
				+
			
 
				+func WriteKeytoMem(key SM4Key, pwd []byte) ([]byte, error) {
			
 
				+	if pwd != nil {
			
 
				+		block, err := x509.EncryptPEMBlock(rand.Reader,
			
 
				+			"SM4 ENCRYPTED KEY", key, pwd, x509.PEMCipherAES256)
			
 
				+		if err != nil {
			
 
				+			return nil, err
			
 
				+		}
			
 
				+		return pem.EncodeToMemory(block), nil
			
 
				+	} else {
			
 
				+		block := &pem.Block{
			
 
				+			Type:  "SM4 KEY",
			
 
				+			Bytes: key,
			
 
				+		}
			
 
				+		return pem.EncodeToMemory(block), nil
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func WriteKeyToPem(FileName string, key SM4Key, pwd []byte) (bool, error) {
			
 
				+	var block *pem.Block
			
 
				+
			
 
				+	if pwd != nil {
			
 
				+		var err error
			
 
				+		block, err = x509.EncryptPEMBlock(rand.Reader,
			
 
				+			"SM4 ENCRYPTED KEY", key, pwd, x509.PEMCipherAES256)
			
 
				+		if err != nil {
			
 
				+			return false, err
			
 
				+		}
			
 
				+	} else {
			
 
				+		block = &pem.Block{
			
 
				+			Type:  "SM4 KEY",
			
 
				+			Bytes: key,
			
 
				+		}
			
 
				+	}
			
 
				+	file, err := os.Create(FileName)
			
 
				+	if err != nil {
			
 
				+		return false, err
			
 
				+	}
			
 
				+	defer file.Close()
			
 
				+	err = pem.Encode(file, block)
			
 
				+	if err != nil {
			
 
				+		return false, nil
			
 
				+	}
			
 
				+	return true, nil
			
 
				+}
			
 
				+
			
 
				+func (k KeySizeError) Error() string {
			
 
				+	return "SM4: invalid key size " + strconv.Itoa(int(k))
			
 
				+}
			
 
				+
			
 
				+// NewCipher creates and returns a new cipher.Block.
			
 
				+func NewCipher(key []byte) (cipher.Block, error) {
			
 
				+	if len(key) != BlockSize {
			
 
				+		return nil, KeySizeError(len(key))
			
 
				+	}
			
 
				+	c := new(Sm4Cipher)
			
 
				+	c.subkeys = generateSubKeys(key)
			
 
				+	c.block1 = make([]uint32, 4)
			
 
				+	c.block2 = make([]byte, 16)
			
 
				+	return c, nil
			
 
				+}
			
 
				+
			
 
				+func (c *Sm4Cipher) BlockSize() int {
			
 
				+	return BlockSize
			
 
				+}
			
 
				+
			
 
				+func (c *Sm4Cipher) Encrypt(dst, src []byte) {
			
 
				+	cryptBlock(c.subkeys, c.block1, c.block2, dst, src, false)
			
 
				+}
			
 
				+
			
 
				+func (c *Sm4Cipher) Decrypt(dst, src []byte) {
			
 
				+	cryptBlock(c.subkeys, c.block1, c.block2, dst, src, true)
			
 
				+}