Browse Source

update kcp-go package

fatedier 6 years ago
parent
commit
fdcdccb0c2
100 changed files with 10618 additions and 2456 deletions
  1. 3 2
      go.mod
  2. 6 2
      go.sum
  3. 3 1
      vendor/github.com/fatedier/kcp-go/.travis.yml
  4. 55 43
      vendor/github.com/fatedier/kcp-go/README.md
  5. 549 52
      vendor/github.com/fatedier/kcp-go/crypt.go
  6. 52 0
      vendor/github.com/fatedier/kcp-go/entropy.go
  7. 49 41
      vendor/github.com/fatedier/kcp-go/fec.go
  8. 98 84
      vendor/github.com/fatedier/kcp-go/kcp.go
  9. 239 229
      vendor/github.com/fatedier/kcp-go/sess.go
  10. 6 7
      vendor/github.com/fatedier/kcp-go/updater.go
  11. 0 110
      vendor/github.com/fatedier/kcp-go/xor.go
  12. 0 16
      vendor/github.com/klauspost/cpuid/.gitignore
  13. 23 0
      vendor/github.com/klauspost/cpuid/.travis.yml
  14. 35 0
      vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
  15. 22 0
      vendor/github.com/klauspost/cpuid/LICENSE
  16. 145 0
      vendor/github.com/klauspost/cpuid/README.md
  17. 1040 0
      vendor/github.com/klauspost/cpuid/cpuid.go
  18. 42 0
      vendor/github.com/klauspost/cpuid/cpuid_386.s
  19. 42 0
      vendor/github.com/klauspost/cpuid/cpuid_amd64.s
  20. 17 0
      vendor/github.com/klauspost/cpuid/detect_intel.go
  21. 23 0
      vendor/github.com/klauspost/cpuid/detect_ref.go
  22. 4 0
      vendor/github.com/klauspost/cpuid/generate.go
  23. 476 0
      vendor/github.com/klauspost/cpuid/private-gen.go
  24. 26 0
      vendor/github.com/klauspost/reedsolomon/.gitignore
  25. 33 0
      vendor/github.com/klauspost/reedsolomon/.travis.yml
  26. 2 2
      vendor/github.com/klauspost/reedsolomon/LICENSE
  27. 321 0
      vendor/github.com/klauspost/reedsolomon/README.md
  28. 20 0
      vendor/github.com/klauspost/reedsolomon/appveyor.yml
  29. 65 0
      vendor/github.com/klauspost/reedsolomon/galois.go
  30. 184 0
      vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
  31. 590 0
      vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
  32. 92 0
      vendor/github.com/klauspost/reedsolomon/galois_amd64.go
  33. 236 0
      vendor/github.com/klauspost/reedsolomon/galois_amd64.s
  34. 52 0
      vendor/github.com/klauspost/reedsolomon/galois_arm64.go
  35. 141 0
      vendor/github.com/klauspost/reedsolomon/galois_arm64.s
  36. 34 0
      vendor/github.com/klauspost/reedsolomon/galois_noasm.go
  37. 70 0
      vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
  38. 126 0
      vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
  39. 132 0
      vendor/github.com/klauspost/reedsolomon/gentables.go
  40. 160 0
      vendor/github.com/klauspost/reedsolomon/inversion_tree.go
  41. 279 0
      vendor/github.com/klauspost/reedsolomon/matrix.go
  42. 118 0
      vendor/github.com/klauspost/reedsolomon/options.go
  43. 887 0
      vendor/github.com/klauspost/reedsolomon/reedsolomon.go
  44. 584 0
      vendor/github.com/klauspost/reedsolomon/streaming.go
  45. 0 9
      vendor/github.com/templexxx/reedsolomon/.travis.yml
  46. 0 108
      vendor/github.com/templexxx/reedsolomon/README.md
  47. 0 156
      vendor/github.com/templexxx/reedsolomon/matrix.go
  48. 0 280
      vendor/github.com/templexxx/reedsolomon/rs.go
  49. 0 868
      vendor/github.com/templexxx/reedsolomon/rs_amd64.go
  50. 0 401
      vendor/github.com/templexxx/reedsolomon/rs_amd64.s
  51. 0 8
      vendor/github.com/templexxx/reedsolomon/rs_other.go
  52. 0 37
      vendor/github.com/templexxx/reedsolomon/tbl.go
  53. 119 0
      vendor/golang.org/x/net/ipv6/batch.go
  54. 187 0
      vendor/golang.org/x/net/ipv6/control.go
  55. 48 0
      vendor/golang.org/x/net/ipv6/control_rfc2292_unix.go
  56. 94 0
      vendor/golang.org/x/net/ipv6/control_rfc3542_unix.go
  57. 13 0
      vendor/golang.org/x/net/ipv6/control_stub.go
  58. 55 0
      vendor/golang.org/x/net/ipv6/control_unix.go
  59. 16 0
      vendor/golang.org/x/net/ipv6/control_windows.go
  60. 112 0
      vendor/golang.org/x/net/ipv6/defs_darwin.go
  61. 84 0
      vendor/golang.org/x/net/ipv6/defs_dragonfly.go
  62. 105 0
      vendor/golang.org/x/net/ipv6/defs_freebsd.go
  63. 147 0
      vendor/golang.org/x/net/ipv6/defs_linux.go
  64. 80 0
      vendor/golang.org/x/net/ipv6/defs_netbsd.go
  65. 89 0
      vendor/golang.org/x/net/ipv6/defs_openbsd.go
  66. 114 0
      vendor/golang.org/x/net/ipv6/defs_solaris.go
  67. 302 0
      vendor/golang.org/x/net/ipv6/dgramopt.go
  68. 243 0
      vendor/golang.org/x/net/ipv6/doc.go
  69. 128 0
      vendor/golang.org/x/net/ipv6/endpoint.go
  70. 199 0
      vendor/golang.org/x/net/ipv6/gen.go
  71. 58 0
      vendor/golang.org/x/net/ipv6/genericopt.go
  72. 55 0
      vendor/golang.org/x/net/ipv6/header.go
  73. 57 0
      vendor/golang.org/x/net/ipv6/helper.go
  74. 86 0
      vendor/golang.org/x/net/ipv6/iana.go
  75. 60 0
      vendor/golang.org/x/net/ipv6/icmp.go
  76. 29 0
      vendor/golang.org/x/net/ipv6/icmp_bsd.go
  77. 27 0
      vendor/golang.org/x/net/ipv6/icmp_linux.go
  78. 27 0
      vendor/golang.org/x/net/ipv6/icmp_solaris.go
  79. 23 0
      vendor/golang.org/x/net/ipv6/icmp_stub.go
  80. 22 0
      vendor/golang.org/x/net/ipv6/icmp_windows.go
  81. 23 0
      vendor/golang.org/x/net/ipv6/payload.go
  82. 35 0
      vendor/golang.org/x/net/ipv6/payload_cmsg.go
  83. 55 0
      vendor/golang.org/x/net/ipv6/payload_cmsg_go1_8.go
  84. 57 0
      vendor/golang.org/x/net/ipv6/payload_cmsg_go1_9.go
  85. 41 0
      vendor/golang.org/x/net/ipv6/payload_nocmsg.go
  86. 43 0
      vendor/golang.org/x/net/ipv6/sockopt.go
  87. 87 0
      vendor/golang.org/x/net/ipv6/sockopt_posix.go
  88. 46 0
      vendor/golang.org/x/net/ipv6/sockopt_stub.go
  89. 24 0
      vendor/golang.org/x/net/ipv6/sys_asmreq.go
  90. 17 0
      vendor/golang.org/x/net/ipv6/sys_asmreq_stub.go
  91. 23 0
      vendor/golang.org/x/net/ipv6/sys_bpf.go
  92. 16 0
      vendor/golang.org/x/net/ipv6/sys_bpf_stub.go
  93. 57 0
      vendor/golang.org/x/net/ipv6/sys_bsd.go
  94. 106 0
      vendor/golang.org/x/net/ipv6/sys_darwin.go
  95. 92 0
      vendor/golang.org/x/net/ipv6/sys_freebsd.go
  96. 74 0
      vendor/golang.org/x/net/ipv6/sys_linux.go
  97. 74 0
      vendor/golang.org/x/net/ipv6/sys_solaris.go
  98. 54 0
      vendor/golang.org/x/net/ipv6/sys_ssmreq.go
  99. 21 0
      vendor/golang.org/x/net/ipv6/sys_ssmreq_stub.go
  100. 13 0
      vendor/golang.org/x/net/ipv6/sys_stub.go

+ 3 - 2
go.mod

@@ -7,13 +7,15 @@ require (
 	github.com/davecgh/go-spew v1.1.0 // indirect
 	github.com/fatedier/beego v0.0.0-20171024143340-6c6a4f5bd5eb
 	github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049
-	github.com/fatedier/kcp-go v0.0.0-20171023144637-cd167d2f15f4
+	github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible
 	github.com/golang/snappy v0.0.0-20170215233205-553a64147049 // indirect
 	github.com/gorilla/context v1.1.1 // indirect
 	github.com/gorilla/mux v1.6.2
 	github.com/gorilla/websocket v1.2.0
 	github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d
 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
+	github.com/klauspost/cpuid v1.2.0 // indirect
+	github.com/klauspost/reedsolomon v1.9.1 // indirect
 	github.com/mattn/go-runewidth v0.0.4 // indirect
 	github.com/pkg/errors v0.8.0 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
@@ -23,7 +25,6 @@ require (
 	github.com/spf13/pflag v1.0.1 // indirect
 	github.com/stretchr/testify v1.2.1
 	github.com/templexxx/cpufeat v0.0.0-20170927014610-3794dfbfb047 // indirect
-	github.com/templexxx/reedsolomon v0.0.0-20170926020725-5e06b81a1c76 // indirect
 	github.com/templexxx/xor v0.0.0-20170926022130-0af8e873c554 // indirect
 	github.com/tjfoc/gmsm v0.0.0-20171124023159-98aa888b79d8 // indirect
 	github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec

+ 6 - 2
go.sum

@@ -3,7 +3,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
 github.com/fatedier/beego v0.0.0-20171024143340-6c6a4f5bd5eb/go.mod h1:wx3gB6dbIfBRcucp94PI9Bt3I0F2c/MyNEWuhzpWiwk=
 github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049 h1:teH578mf2ii42NHhIp3PhgvjU5bv+NFMq9fSQR8NaG8=
 github.com/fatedier/golib v0.0.0-20181107124048-ff8cd814b049/go.mod h1:DqIrnl0rp3Zybg9zbJmozTy1n8fYJoX+QoAj9slIkKM=
-github.com/fatedier/kcp-go v0.0.0-20171023144637-cd167d2f15f4/go.mod h1:YpCOaxj7vvMThhIQ9AfTOPW2sfztQR5WDfs7AflSy4s=
+github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible h1:pNNeBKz1jtMDupiwvtEGFTujA3J86xoEXGSkwVeYFsw=
+github.com/fatedier/kcp-go v2.0.4-0.20190317085623-2063a803e6fe+incompatible/go.mod h1:YpCOaxj7vvMThhIQ9AfTOPW2sfztQR5WDfs7AflSy4s=
 github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8=
 github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
@@ -13,6 +14,10 @@ github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d h1:kJCB4vdITiW1eC1
 github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
 github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
 github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
+github.com/klauspost/cpuid v1.2.0 h1:NMpwD2G9JSFOE1/TJjGSo5zG7Yb2bTe7eq1jH+irmeE=
+github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
+github.com/klauspost/reedsolomon v1.9.1 h1:kYrT1MlR4JH6PqOpC+okdb9CDTcwEC/BqpzK4WFyXL8=
+github.com/klauspost/reedsolomon v1.9.1/go.mod h1:CwCi+NUr9pqSVktrkN+Ondf06rkhYZ/pcNv7fu+8Un4=
 github.com/mattn/go-runewidth v0.0.4 h1:2BvfKmzob6Bmd4YsL0zygOqfdFnK7GR4QL06Do4/p7Y=
 github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
 github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -23,7 +28,6 @@ github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3
 github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/templexxx/cpufeat v0.0.0-20170927014610-3794dfbfb047/go.mod h1:wM7WEvslTq+iOEAMDLSzhVuOt5BRZ05WirO+b09GHQU=
-github.com/templexxx/reedsolomon v0.0.0-20170926020725-5e06b81a1c76/go.mod h1:ToWcj2sZ6xHl14JjZiVDktYpFtrFZJXBlsu7TV23lNg=
 github.com/templexxx/xor v0.0.0-20170926022130-0af8e873c554/go.mod h1:5XA7W9S6mni3h5uvOC75dA3m9CCCaS83lltmc0ukdi4=
 github.com/tjfoc/gmsm v0.0.0-20171124023159-98aa888b79d8/go.mod h1:XxO4hdhhrzAd+G4CjDqaOkd0hUzmtPR/d3EiBBMn/wc=
 github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec/go.mod h1:owBmyHYMLkxyrugmfwE/DLJyW8Ro9mkphwuVErQ0iUw=

+ 3 - 1
vendor/github.com/fatedier/kcp-go/.travis.yml

@@ -1,6 +1,8 @@
 language: go
 go:
-    - 1.9
+    - 1.9.x
+    - 1.10.x
+    - 1.11.x
 
 before_install:
     - go get -t -v ./...

+ 55 - 43
vendor/github.com/fatedier/kcp-go/README.md

@@ -20,24 +20,20 @@
 
 **kcp-go** is a **Production-Grade Reliable-UDP** library for [golang](https://golang.org/). 
 
-It provides **fast, ordered and error-checked** delivery of streams over **UDP** packets, has been well tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) are running with **kcp-go** at present, including applications like **online games, live broadcasting, file synchronization and network acceleration**.
+This library intents to provide a **smooth, resilient, ordered, error-checked and anonymous** delivery of streams over **UDP** packets, it has been battle-tested with opensource project [kcptun](https://github.com/xtaci/kcptun). Millions of devices(from low-end MIPS routers to high-end servers) have deployed **kcp-go** powered program in a variety of forms like **online games, live broadcasting, file synchronization and network acceleration**.
 
 [Lastest Release](https://github.com/xtaci/kcp-go/releases)
 
 ## Features
 
-1. Optimized for **Realtime Online Games, Audio/Video Streaming and Latency-Sensitive Distributed Consensus**.
-1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with language specific optimizations.
+1. Designed for **Latency-sensitive** scenarios.
 1. **Cache friendly** and **Memory optimized** design, offers extremely **High Performance** core.
 1. Handles **>5K concurrent connections** on a single commodity server.
 1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener), a drop-in replacement for [net.TCPConn](https://golang.org/pkg/net/#TCPConn).
 1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction)
-1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode.
-1. **Fixed number of goroutines** created for the entire server application, minimized goroutine context switch.
-
-## Conventions
-
-Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
+1. Packet level encryption support with [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard), [TEA](https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm), [3DES](https://en.wikipedia.org/wiki/Triple_DES), [Blowfish](https://en.wikipedia.org/wiki/Blowfish_(cipher)), [Cast5](https://en.wikipedia.org/wiki/CAST-128), [Salsa20]( https://en.wikipedia.org/wiki/Salsa20), etc. in [CFB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Cipher_Feedback_.28CFB.29) mode, which generates completely anonymous packet.
+1. Only **A fixed number of goroutines** will be created for the entire server application, costs in **context switch** between goroutines have been taken into consideration.
+1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with various improvements.
 
 ## Documentation
 
@@ -80,47 +76,59 @@ Server:   [full demo](https://github.com/xtaci/kcptun/blob/master/server/main.go
 lis, err := kcp.ListenWithOptions(":10000", nil, 10, 3)
 ```
 
-## Performance
+## Benchmark
 ```
   Model Name:	MacBook Pro
-  Model Identifier:	MacBookPro12,1
+  Model Identifier:	MacBookPro14,1
   Processor Name:	Intel Core i5
-  Processor Speed:	2.7 GHz
+  Processor Speed:	3.1 GHz
   Number of Processors:	1
   Total Number of Cores:	2
   L2 Cache (per Core):	256 KB
-  L3 Cache:	3 MB
+  L3 Cache:	4 MB
   Memory:	8 GB
 ```
 ```
 $ go test -v -run=^$ -bench .
 beginning tests, encryption:salsa20, fec:10/3
-BenchmarkAES128-4          	  200000	      8256 ns/op	 363.33 MB/s	       0 B/op	       0 allocs/op
-BenchmarkAES192-4          	  200000	      9153 ns/op	 327.74 MB/s	       0 B/op	       0 allocs/op
-BenchmarkAES256-4          	  200000	     10079 ns/op	 297.64 MB/s	       0 B/op	       0 allocs/op
-BenchmarkTEA-4             	  100000	     18643 ns/op	 160.91 MB/s	       0 B/op	       0 allocs/op
-BenchmarkXOR-4             	 5000000	       316 ns/op	9486.46 MB/s	       0 B/op	       0 allocs/op
-BenchmarkBlowfish-4        	   50000	     35643 ns/op	  84.17 MB/s	       0 B/op	       0 allocs/op
-BenchmarkNone-4            	30000000	        56.2 ns/op	53371.83 MB/s	       0 B/op	       0 allocs/op
-BenchmarkCast5-4           	   30000	     44744 ns/op	  67.05 MB/s	       0 B/op	       0 allocs/op
-Benchmark3DES-4            	    2000	    639839 ns/op	   4.69 MB/s	       2 B/op	       0 allocs/op
-BenchmarkTwofish-4         	   30000	     43368 ns/op	  69.17 MB/s	       0 B/op	       0 allocs/op
-BenchmarkXTEA-4            	   30000	     57673 ns/op	  52.02 MB/s	       0 B/op	       0 allocs/op
-BenchmarkSalsa20-4         	  300000	      3917 ns/op	 765.80 MB/s	       0 B/op	       0 allocs/op
-BenchmarkFlush-4           	10000000	       226 ns/op	       0 B/op	       0 allocs/op
-BenchmarkEchoSpeed4K-4     	    5000	    300030 ns/op	  13.65 MB/s	    5672 B/op	     177 allocs/op
-BenchmarkEchoSpeed64K-4    	     500	   3202335 ns/op	  20.47 MB/s	   73295 B/op	    2198 allocs/op
-BenchmarkEchoSpeed512K-4   	      50	  24926924 ns/op	  21.03 MB/s	  659339 B/op	   17602 allocs/op
-BenchmarkEchoSpeed1M-4     	      20	  64857821 ns/op	  16.17 MB/s	 1772437 B/op	   42869 allocs/op
-BenchmarkSinkSpeed4K-4     	   30000	     50230 ns/op	  81.54 MB/s	    2058 B/op	      48 allocs/op
-BenchmarkSinkSpeed64K-4    	    2000	    648718 ns/op	 101.02 MB/s	   31165 B/op	     687 allocs/op
-BenchmarkSinkSpeed256K-4   	     300	   4635905 ns/op	 113.09 MB/s	  286229 B/op	    5516 allocs/op
-BenchmarkSinkSpeed1M-4     	     200	   9566933 ns/op	 109.60 MB/s	  463771 B/op	   10701 allocs/op
+goos: darwin
+goarch: amd64
+pkg: github.com/xtaci/kcp-go
+BenchmarkSM4-4                 	   50000	     32180 ns/op	  93.23 MB/s	       0 B/op	       0 allocs/op
+BenchmarkAES128-4              	  500000	      3285 ns/op	 913.21 MB/s	       0 B/op	       0 allocs/op
+BenchmarkAES192-4              	  300000	      3623 ns/op	 827.85 MB/s	       0 B/op	       0 allocs/op
+BenchmarkAES256-4              	  300000	      3874 ns/op	 774.20 MB/s	       0 B/op	       0 allocs/op
+BenchmarkTEA-4                 	  100000	     15384 ns/op	 195.00 MB/s	       0 B/op	       0 allocs/op
+BenchmarkXOR-4                 	20000000	        89.9 ns/op	33372.00 MB/s	       0 B/op	       0 allocs/op
+BenchmarkBlowfish-4            	   50000	     26927 ns/op	 111.41 MB/s	       0 B/op	       0 allocs/op
+BenchmarkNone-4                	30000000	        45.7 ns/op	65597.94 MB/s	       0 B/op	       0 allocs/op
+BenchmarkCast5-4               	   50000	     34258 ns/op	  87.57 MB/s	       0 B/op	       0 allocs/op
+Benchmark3DES-4                	   10000	    117149 ns/op	  25.61 MB/s	       0 B/op	       0 allocs/op
+BenchmarkTwofish-4             	   50000	     33538 ns/op	  89.45 MB/s	       0 B/op	       0 allocs/op
+BenchmarkXTEA-4                	   30000	     45666 ns/op	  65.69 MB/s	       0 B/op	       0 allocs/op
+BenchmarkSalsa20-4             	  500000	      3308 ns/op	 906.76 MB/s	       0 B/op	       0 allocs/op
+BenchmarkCRC32-4               	20000000	        65.2 ns/op	15712.43 MB/s
+BenchmarkCsprngSystem-4        	 1000000	      1150 ns/op	  13.91 MB/s
+BenchmarkCsprngMD5-4           	10000000	       145 ns/op	 110.26 MB/s
+BenchmarkCsprngSHA1-4          	10000000	       158 ns/op	 126.54 MB/s
+BenchmarkCsprngNonceMD5-4      	10000000	       153 ns/op	 104.22 MB/s
+BenchmarkCsprngNonceAES128-4   	100000000	        19.1 ns/op	 837.81 MB/s
+BenchmarkFECDecode-4           	 1000000	      1119 ns/op	1339.61 MB/s	    1606 B/op	       2 allocs/op
+BenchmarkFECEncode-4           	 2000000	       832 ns/op	1801.83 MB/s	      17 B/op	       0 allocs/op
+BenchmarkFlush-4               	 5000000	       272 ns/op	       0 B/op	       0 allocs/op
+BenchmarkEchoSpeed4K-4         	    5000	    259617 ns/op	  15.78 MB/s	    5451 B/op	     149 allocs/op
+BenchmarkEchoSpeed64K-4        	    1000	   1706084 ns/op	  38.41 MB/s	   56002 B/op	    1604 allocs/op
+BenchmarkEchoSpeed512K-4       	     100	  14345505 ns/op	  36.55 MB/s	  482597 B/op	   13045 allocs/op
+BenchmarkEchoSpeed1M-4         	      30	  34859104 ns/op	  30.08 MB/s	 1143773 B/op	   27186 allocs/op
+BenchmarkSinkSpeed4K-4         	   50000	     31369 ns/op	 130.57 MB/s	    1566 B/op	      30 allocs/op
+BenchmarkSinkSpeed64K-4        	    5000	    329065 ns/op	 199.16 MB/s	   21529 B/op	     453 allocs/op
+BenchmarkSinkSpeed256K-4       	     500	   2373354 ns/op	 220.91 MB/s	  166332 B/op	    3554 allocs/op
+BenchmarkSinkSpeed1M-4         	     300	   5117927 ns/op	 204.88 MB/s	  310378 B/op	    6988 allocs/op
 PASS
-ok  	_/Users/xtaci/.godeps/src/github.com/xtaci/kcp-go	39.689s
+ok  	github.com/xtaci/kcp-go	50.349s
 ```
 
-## Design Considerations
+## Key Design Considerations
 
 1. slice vs. container/list
 
@@ -139,7 +147,9 @@ List structure introduces **heavy cache misses** compared to slice which owns be
 
 2. Timing accuracy vs. syscall clock_gettime
 
-Timing is **critical** to **RTT estimator**, inaccurate timing introduces false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz), the benchmark for time.Now():
+Timing is **critical** to **RTT estimator**, inaccurate timing leads to false retransmissions in KCP, but calling `time.Now()` costs 42 cycles(10.5ns on 4GHz CPU, 15.6ns on my MacBook Pro 2.7GHz). 
+
+The benchmark for time.Now() lies here:
 
 https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
 
@@ -147,14 +157,17 @@ https://github.com/xtaci/notes/blob/master/golang/benchmark2/syscall_test.go
 BenchmarkNow-4         	100000000	        15.6 ns/op
 ```
 
-In kcp-go, after each `kcp.output()` function call, current time will be updated upon return, and each `kcp.flush()` will get current time once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(no packet needs to be sent by `kcp.output()`), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
+In kcp-go, after each `kcp.output()` function call, current clock time will be updated upon return, and for a single `kcp.flush()` operation, current time will be queried from system once. For most of the time, 5000 connections costs 5000 * 15.6ns = 78us(a fixed cost while no packet needs to be sent), as for 10MB/s data transfering with 1400 MTU, `kcp.output()` will be called around 7500 times and costs 117us for `time.Now()` in **every second**.
+
+## Connection Termination
 
+Control messages like **SYN/FIN/RST** in TCP **are not defined** in KCP, you need some **keepalive/heartbeat mechanism** in the application-level. A real world example is to use some **multiplexing** protocol over session, such as [smux](https://github.com/xtaci/smux)(with embedded keepalive mechanism), see [kcptun](https://github.com/xtaci/kcptun) for example.
 
-## Tuning
+## FAQ
 
-Q: I'm handling >5K connections on my server. the CPU utilization is high.
+Q: I'm handling >5K connections on my server, the CPU utilization is so high.
 
-A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load.
+A: A standalone `agent` or `gate` server for running kcp-go is suggested, not only for CPU utilization, but also important to the **precision** of RTT measurements(timing) which indirectly affects retransmission. By increasing update `interval` with `SetNoDelay` like `conn.SetNoDelay(1, 40, 1, 1)` will dramatically reduce system load, but lower the performance.
 
 ## Who is using this?
 
@@ -163,10 +176,9 @@ A: A standalone `agent` or `gate` server for kcp-go is suggested, not only for C
 3. https://github.com/smallnest/rpcx -- A RPC service framework based on net/rpc like alibaba Dubbo and weibo Motan.
 4. https://github.com/gonet2/agent -- A gateway for games with stream multiplexing.
 5. https://github.com/syncthing/syncthing -- Open Source Continuous File Synchronization.
-6. https://play.google.com/store/apps/details?id=com.k17game.k3 -- Battle Zone - Earth 2048, a world-wide strategy game.
 
 ## Links
 
 1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++
 2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol
-3. https://github.com/templexxx/reedsolomon -- Reed-Solomon Erasure Coding in Go
+3. https://github.com/klauspost/reedsolomon -- Reed-Solomon Erasure Coding in Go

+ 549 - 52
vendor/github.com/fatedier/kcp-go/crypt.go

@@ -57,8 +57,8 @@ func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) {
 }
 
 type sm4BlockCrypt struct {
-	encbuf []byte
-	decbuf []byte
+	encbuf [sm4.BlockSize]byte
+	decbuf [2 * sm4.BlockSize]byte
 	block  cipher.Block
 }
 
@@ -70,17 +70,15 @@ func NewSM4BlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, sm4.BlockSize)
-	c.decbuf = make([]byte, 2*sm4.BlockSize)
 	return c, nil
 }
 
-func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
-func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *sm4BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
+func (c *sm4BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 
 type twofishBlockCrypt struct {
-	encbuf []byte
-	decbuf []byte
+	encbuf [twofish.BlockSize]byte
+	decbuf [2 * twofish.BlockSize]byte
 	block  cipher.Block
 }
 
@@ -92,17 +90,15 @@ func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, twofish.BlockSize)
-	c.decbuf = make([]byte, 2*twofish.BlockSize)
 	return c, nil
 }
 
-func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
-func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
+func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 
 type tripleDESBlockCrypt struct {
-	encbuf []byte
-	decbuf []byte
+	encbuf [des.BlockSize]byte
+	decbuf [2 * des.BlockSize]byte
 	block  cipher.Block
 }
 
@@ -114,17 +110,15 @@ func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, des.BlockSize)
-	c.decbuf = make([]byte, 2*des.BlockSize)
 	return c, nil
 }
 
-func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
-func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
+func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 
 type cast5BlockCrypt struct {
-	encbuf []byte
-	decbuf []byte
+	encbuf [cast5.BlockSize]byte
+	decbuf [2 * cast5.BlockSize]byte
 	block  cipher.Block
 }
 
@@ -136,17 +130,15 @@ func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, cast5.BlockSize)
-	c.decbuf = make([]byte, 2*cast5.BlockSize)
 	return c, nil
 }
 
-func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
-func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
+func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 
 type blowfishBlockCrypt struct {
-	encbuf []byte
-	decbuf []byte
+	encbuf [blowfish.BlockSize]byte
+	decbuf [2 * blowfish.BlockSize]byte
 	block  cipher.Block
 }
 
@@ -158,17 +150,15 @@ func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, blowfish.BlockSize)
-	c.decbuf = make([]byte, 2*blowfish.BlockSize)
 	return c, nil
 }
 
-func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
-func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
+func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 
 type aesBlockCrypt struct {
-	encbuf []byte
-	decbuf []byte
+	encbuf [aes.BlockSize]byte
+	decbuf [2 * aes.BlockSize]byte
 	block  cipher.Block
 }
 
@@ -180,17 +170,15 @@ func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, aes.BlockSize)
-	c.decbuf = make([]byte, 2*aes.BlockSize)
 	return c, nil
 }
 
-func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
-func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
+func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 
 type teaBlockCrypt struct {
-	encbuf []byte
-	decbuf []byte
+	encbuf [tea.BlockSize]byte
+	decbuf [2 * tea.BlockSize]byte
 	block  cipher.Block
 }
 
@@ -202,17 +190,15 @@ func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, tea.BlockSize)
-	c.decbuf = make([]byte, 2*tea.BlockSize)
 	return c, nil
 }
 
-func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
-func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
+func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 
 type xteaBlockCrypt struct {
-	encbuf []byte
-	decbuf []byte
+	encbuf [xtea.BlockSize]byte
+	decbuf [2 * xtea.BlockSize]byte
 	block  cipher.Block
 }
 
@@ -224,13 +210,11 @@ func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
 		return nil, err
 	}
 	c.block = block
-	c.encbuf = make([]byte, xtea.BlockSize)
-	c.decbuf = make([]byte, 2*xtea.BlockSize)
 	return c, nil
 }
 
-func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
-func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
+func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf[:]) }
+func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf[:]) }
 
 type simpleXORBlockCrypt struct {
 	xortbl []byte
@@ -258,31 +242,544 @@ func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
 
 // packet encryption with local CFB mode
 func encrypt(block cipher.Block, dst, src, buf []byte) {
+	switch block.BlockSize() {
+	case 8:
+		encrypt8(block, dst, src, buf)
+	case 16:
+		encrypt16(block, dst, src, buf)
+	default:
+		encryptVariant(block, dst, src, buf)
+	}
+}
+
+// optimized encryption for the ciphers which works in 8-bytes
+func encrypt8(block cipher.Block, dst, src, buf []byte) {
+	tbl := buf[:8]
+	block.Encrypt(tbl, initialVector)
+	n := len(src) / 8
+	base := 0
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		s := src[base:][0:64]
+		d := dst[base:][0:64]
+		// 1
+		xor.BytesSrc1(d[0:8], s[0:8], tbl)
+		block.Encrypt(tbl, d[0:8])
+		// 2
+		xor.BytesSrc1(d[8:16], s[8:16], tbl)
+		block.Encrypt(tbl, d[8:16])
+		// 3
+		xor.BytesSrc1(d[16:24], s[16:24], tbl)
+		block.Encrypt(tbl, d[16:24])
+		// 4
+		xor.BytesSrc1(d[24:32], s[24:32], tbl)
+		block.Encrypt(tbl, d[24:32])
+		// 5
+		xor.BytesSrc1(d[32:40], s[32:40], tbl)
+		block.Encrypt(tbl, d[32:40])
+		// 6
+		xor.BytesSrc1(d[40:48], s[40:48], tbl)
+		block.Encrypt(tbl, d[40:48])
+		// 7
+		xor.BytesSrc1(d[48:56], s[48:56], tbl)
+		block.Encrypt(tbl, d[48:56])
+		// 8
+		xor.BytesSrc1(d[56:64], s[56:64], tbl)
+		block.Encrypt(tbl, d[56:64])
+		base += 64
+	}
+
+	switch left {
+	case 7:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 6:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 5:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 4:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 3:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 2:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 1:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 8
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
+}
+
+// optimized encryption for the ciphers which works in 16-bytes
+func encrypt16(block cipher.Block, dst, src, buf []byte) {
+	tbl := buf[:16]
+	block.Encrypt(tbl, initialVector)
+	n := len(src) / 16
+	base := 0
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		s := src[base:][0:128]
+		d := dst[base:][0:128]
+		// 1
+		xor.BytesSrc1(d[0:16], s[0:16], tbl)
+		block.Encrypt(tbl, d[0:16])
+		// 2
+		xor.BytesSrc1(d[16:32], s[16:32], tbl)
+		block.Encrypt(tbl, d[16:32])
+		// 3
+		xor.BytesSrc1(d[32:48], s[32:48], tbl)
+		block.Encrypt(tbl, d[32:48])
+		// 4
+		xor.BytesSrc1(d[48:64], s[48:64], tbl)
+		block.Encrypt(tbl, d[48:64])
+		// 5
+		xor.BytesSrc1(d[64:80], s[64:80], tbl)
+		block.Encrypt(tbl, d[64:80])
+		// 6
+		xor.BytesSrc1(d[80:96], s[80:96], tbl)
+		block.Encrypt(tbl, d[80:96])
+		// 7
+		xor.BytesSrc1(d[96:112], s[96:112], tbl)
+		block.Encrypt(tbl, d[96:112])
+		// 8
+		xor.BytesSrc1(d[112:128], s[112:128], tbl)
+		block.Encrypt(tbl, d[112:128])
+		base += 128
+	}
+
+	switch left {
+	case 7:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 6:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 5:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 4:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 3:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 2:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 1:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += 16
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
+}
+
+func encryptVariant(block cipher.Block, dst, src, buf []byte) {
 	blocksize := block.BlockSize()
 	tbl := buf[:blocksize]
 	block.Encrypt(tbl, initialVector)
 	n := len(src) / blocksize
 	base := 0
-	for i := 0; i < n; i++ {
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		// 1
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+
+		// 2
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+
+		// 3
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+
+		// 4
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+
+		// 5
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+
+		// 6
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+
+		// 7
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+
+		// 8
 		xor.BytesSrc1(dst[base:], src[base:], tbl)
 		block.Encrypt(tbl, dst[base:])
 		base += blocksize
 	}
-	xor.BytesSrc0(dst[base:], src[base:], tbl)
+
+	switch left {
+	case 7:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 6:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 5:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 4:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 3:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 2:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 1:
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		block.Encrypt(tbl, dst[base:])
+		base += blocksize
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
 }
 
+// decryption
 func decrypt(block cipher.Block, dst, src, buf []byte) {
+	switch block.BlockSize() {
+	case 8:
+		decrypt8(block, dst, src, buf)
+	case 16:
+		decrypt16(block, dst, src, buf)
+	default:
+		decryptVariant(block, dst, src, buf)
+	}
+}
+
+func decrypt8(block cipher.Block, dst, src, buf []byte) {
+	tbl := buf[0:8]
+	next := buf[8:16]
+	block.Encrypt(tbl, initialVector)
+	n := len(src) / 8
+	base := 0
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		s := src[base:][0:64]
+		d := dst[base:][0:64]
+		// 1
+		block.Encrypt(next, s[0:8])
+		xor.BytesSrc1(d[0:8], s[0:8], tbl)
+		// 2
+		block.Encrypt(tbl, s[8:16])
+		xor.BytesSrc1(d[8:16], s[8:16], next)
+		// 3
+		block.Encrypt(next, s[16:24])
+		xor.BytesSrc1(d[16:24], s[16:24], tbl)
+		// 4
+		block.Encrypt(tbl, s[24:32])
+		xor.BytesSrc1(d[24:32], s[24:32], next)
+		// 5
+		block.Encrypt(next, s[32:40])
+		xor.BytesSrc1(d[32:40], s[32:40], tbl)
+		// 6
+		block.Encrypt(tbl, s[40:48])
+		xor.BytesSrc1(d[40:48], s[40:48], next)
+		// 7
+		block.Encrypt(next, s[48:56])
+		xor.BytesSrc1(d[48:56], s[48:56], tbl)
+		// 8
+		block.Encrypt(tbl, s[56:64])
+		xor.BytesSrc1(d[56:64], s[56:64], next)
+		base += 64
+	}
+
+	switch left {
+	case 7:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 6:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 5:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 4:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 3:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 2:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 1:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 8
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
+}
+
+func decrypt16(block cipher.Block, dst, src, buf []byte) {
+	tbl := buf[0:16]
+	next := buf[16:32]
+	block.Encrypt(tbl, initialVector)
+	n := len(src) / 16
+	base := 0
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		s := src[base:][0:128]
+		d := dst[base:][0:128]
+		// 1
+		block.Encrypt(next, s[0:16])
+		xor.BytesSrc1(d[0:16], s[0:16], tbl)
+		// 2
+		block.Encrypt(tbl, s[16:32])
+		xor.BytesSrc1(d[16:32], s[16:32], next)
+		// 3
+		block.Encrypt(next, s[32:48])
+		xor.BytesSrc1(d[32:48], s[32:48], tbl)
+		// 4
+		block.Encrypt(tbl, s[48:64])
+		xor.BytesSrc1(d[48:64], s[48:64], next)
+		// 5
+		block.Encrypt(next, s[64:80])
+		xor.BytesSrc1(d[64:80], s[64:80], tbl)
+		// 6
+		block.Encrypt(tbl, s[80:96])
+		xor.BytesSrc1(d[80:96], s[80:96], next)
+		// 7
+		block.Encrypt(next, s[96:112])
+		xor.BytesSrc1(d[96:112], s[96:112], tbl)
+		// 8
+		block.Encrypt(tbl, s[112:128])
+		xor.BytesSrc1(d[112:128], s[112:128], next)
+		base += 128
+	}
+
+	switch left {
+	case 7:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 6:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 5:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 4:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 3:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 2:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 1:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += 16
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
+	}
+}
+
+func decryptVariant(block cipher.Block, dst, src, buf []byte) {
 	blocksize := block.BlockSize()
 	tbl := buf[:blocksize]
 	next := buf[blocksize:]
 	block.Encrypt(tbl, initialVector)
 	n := len(src) / blocksize
 	base := 0
-	for i := 0; i < n; i++ {
+	repeat := n / 8
+	left := n % 8
+	for i := 0; i < repeat; i++ {
+		// 1
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		base += blocksize
+
+		// 2
+		block.Encrypt(tbl, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], next)
+		base += blocksize
+
+		// 3
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		base += blocksize
+
+		// 4
+		block.Encrypt(tbl, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], next)
+		base += blocksize
+
+		// 5
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		base += blocksize
+
+		// 6
+		block.Encrypt(tbl, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], next)
+		base += blocksize
+
+		// 7
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		base += blocksize
+
+		// 8
+		block.Encrypt(tbl, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], next)
+		base += blocksize
+	}
+
+	switch left {
+	case 7:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 6:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 5:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 4:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 3:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 2:
+		block.Encrypt(next, src[base:])
+		xor.BytesSrc1(dst[base:], src[base:], tbl)
+		tbl, next = next, tbl
+		base += blocksize
+		fallthrough
+	case 1:
 		block.Encrypt(next, src[base:])
 		xor.BytesSrc1(dst[base:], src[base:], tbl)
 		tbl, next = next, tbl
 		base += blocksize
+		fallthrough
+	case 0:
+		xor.BytesSrc0(dst[base:], src[base:], tbl)
 	}
-	xor.BytesSrc0(dst[base:], src[base:], tbl)
 }

+ 52 - 0
vendor/github.com/fatedier/kcp-go/entropy.go

@@ -0,0 +1,52 @@
+package kcp
+
+import (
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/md5"
+	"crypto/rand"
+	"io"
+)
+
+// Entropy defines a entropy source
+type Entropy interface {
+	Init()
+	Fill(nonce []byte)
+}
+
+// nonceMD5 nonce generator for packet header
+type nonceMD5 struct {
+	seed [md5.Size]byte
+}
+
+func (n *nonceMD5) Init() { /*nothing required*/ }
+
+func (n *nonceMD5) Fill(nonce []byte) {
+	if n.seed[0] == 0 { // entropy update
+		io.ReadFull(rand.Reader, n.seed[:])
+	}
+	n.seed = md5.Sum(n.seed[:])
+	copy(nonce, n.seed[:])
+}
+
+// nonceAES128 nonce generator for packet headers
+type nonceAES128 struct {
+	seed  [aes.BlockSize]byte
+	block cipher.Block
+}
+
+func (n *nonceAES128) Init() {
+	var key [16]byte //aes-128
+	io.ReadFull(rand.Reader, key[:])
+	io.ReadFull(rand.Reader, n.seed[:])
+	block, _ := aes.NewCipher(key[:])
+	n.block = block
+}
+
+func (n *nonceAES128) Fill(nonce []byte) {
+	if n.seed[0] == 0 { // entropy update
+		io.ReadFull(rand.Reader, n.seed[:])
+	}
+	n.block.Encrypt(n.seed[:], n.seed[:])
+	copy(nonce, n.seed[:])
+}

+ 49 - 41
vendor/github.com/fatedier/kcp-go/fec.go

@@ -4,7 +4,7 @@ import (
 	"encoding/binary"
 	"sync/atomic"
 
-	"github.com/templexxx/reedsolomon"
+	"github.com/klauspost/reedsolomon"
 )
 
 const (
@@ -34,6 +34,9 @@ type (
 		decodeCache [][]byte
 		flagCache   []bool
 
+		// zeros
+		zeros []byte
+
 		// RS decoder
 		codec reedsolomon.Encoder
 	}
@@ -47,19 +50,20 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
 		return nil
 	}
 
-	fec := new(fecDecoder)
-	fec.rxlimit = rxlimit
-	fec.dataShards = dataShards
-	fec.parityShards = parityShards
-	fec.shardSize = dataShards + parityShards
-	enc, err := reedsolomon.New(dataShards, parityShards)
+	dec := new(fecDecoder)
+	dec.rxlimit = rxlimit
+	dec.dataShards = dataShards
+	dec.parityShards = parityShards
+	dec.shardSize = dataShards + parityShards
+	codec, err := reedsolomon.New(dataShards, parityShards)
 	if err != nil {
 		return nil
 	}
-	fec.codec = enc
-	fec.decodeCache = make([][]byte, fec.shardSize)
-	fec.flagCache = make([]bool, fec.shardSize)
-	return fec
+	dec.codec = codec
+	dec.decodeCache = make([][]byte, dec.shardSize)
+	dec.flagCache = make([]bool, dec.shardSize)
+	dec.zeros = make([]byte, mtuLimit)
+	return dec
 }
 
 // decodeBytes a fec packet
@@ -116,7 +120,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
 	if searchEnd-searchBegin+1 >= dec.dataShards {
 		var numshard, numDataShard, first, maxlen int
 
-		// zero cache
+		// zero caches
 		shards := dec.decodeCache
 		shardsflag := dec.flagCache
 		for k := range dec.decodeCache {
@@ -146,15 +150,15 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
 		}
 
 		if numDataShard == dec.dataShards {
-			// case 1:  no lost data shards
+			// case 1: no loss on data shards
 			dec.rx = dec.freeRange(first, numshard, dec.rx)
 		} else if numshard >= dec.dataShards {
-			// case 2: data shard lost, but  recoverable from parity shard
+			// case 2: loss on data shards, but it's recoverable from parity shards
 			for k := range shards {
 				if shards[k] != nil {
 					dlen := len(shards[k])
 					shards[k] = shards[k][:maxlen]
-					xorBytes(shards[k][dlen:], shards[k][dlen:], shards[k][dlen:])
+					copy(shards[k][dlen:], dec.zeros)
 				}
 			}
 			if err := dec.codec.ReconstructData(shards); err == nil {
@@ -170,7 +174,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
 
 	// keep rxlimit
 	if len(dec.rx) > dec.rxlimit {
-		if dec.rx[0].flag == typeData { // record unrecoverable data
+		if dec.rx[0].flag == typeData { // track the unrecoverable data
 			atomic.AddUint64(&DefaultSnmp.FECShortShards, 1)
 		}
 		dec.rx = dec.freeRange(0, 1, dec.rx)
@@ -180,7 +184,7 @@ func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
 
 // free a range of fecPacket, and zero for GC recycling
 func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
-	for i := first; i < first+n; i++ { // free
+	for i := first; i < first+n; i++ { // recycle buffer
 		xmitBuf.Put(q[i].data)
 	}
 	copy(q[first:], q[first+n:])
@@ -200,7 +204,7 @@ type (
 		next         uint32 // next seqid
 
 		shardCount int // count the number of datashards collected
-		maxSize    int // record maximum data length in datashard
+		maxSize    int // track maximum data length in datashard
 
 		headerOffset  int // FEC header offset
 		payloadOffset int // FEC payload offset
@@ -209,6 +213,9 @@ type (
 		shardCache  [][]byte
 		encodeCache [][]byte
 
+		// zeros
+		zeros []byte
+
 		// RS encoder
 		codec reedsolomon.Encoder
 	}
@@ -218,31 +225,32 @@ func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
 	if dataShards <= 0 || parityShards <= 0 {
 		return nil
 	}
-	fec := new(fecEncoder)
-	fec.dataShards = dataShards
-	fec.parityShards = parityShards
-	fec.shardSize = dataShards + parityShards
-	fec.paws = (0xffffffff/uint32(fec.shardSize) - 1) * uint32(fec.shardSize)
-	fec.headerOffset = offset
-	fec.payloadOffset = fec.headerOffset + fecHeaderSize
-
-	enc, err := reedsolomon.New(dataShards, parityShards)
+	enc := new(fecEncoder)
+	enc.dataShards = dataShards
+	enc.parityShards = parityShards
+	enc.shardSize = dataShards + parityShards
+	enc.paws = (0xffffffff/uint32(enc.shardSize) - 1) * uint32(enc.shardSize)
+	enc.headerOffset = offset
+	enc.payloadOffset = enc.headerOffset + fecHeaderSize
+
+	codec, err := reedsolomon.New(dataShards, parityShards)
 	if err != nil {
 		return nil
 	}
-	fec.codec = enc
+	enc.codec = codec
 
 	// caches
-	fec.encodeCache = make([][]byte, fec.shardSize)
-	fec.shardCache = make([][]byte, fec.shardSize)
-	for k := range fec.shardCache {
-		fec.shardCache[k] = make([]byte, mtuLimit)
+	enc.encodeCache = make([][]byte, enc.shardSize)
+	enc.shardCache = make([][]byte, enc.shardSize)
+	for k := range enc.shardCache {
+		enc.shardCache[k] = make([]byte, mtuLimit)
 	}
-	return fec
+	enc.zeros = make([]byte, mtuLimit)
+	return enc
 }
 
-// encode the packet, output parity shards if we have enough datashards
-// the content of returned parityshards will change in next encode
+// encodes the packet, outputs parity shards if we have collected quorum datashards
+// notice: the contents of 'ps' will be re-written in successive calling
 func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
 	enc.markData(b[enc.headerOffset:])
 	binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
@@ -253,18 +261,18 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
 	copy(enc.shardCache[enc.shardCount], b)
 	enc.shardCount++
 
-	// record max datashard length
+	// track max datashard length
 	if sz > enc.maxSize {
 		enc.maxSize = sz
 	}
 
-	//  calculate Reed-Solomon Erasure Code
+	//  Generation of Reed-Solomon Erasure Code
 	if enc.shardCount == enc.dataShards {
-		// bzero each datashard's tail
+		// fill '0' into the tail of each datashard
 		for i := 0; i < enc.dataShards; i++ {
 			shard := enc.shardCache[i]
 			slen := len(shard)
-			xorBytes(shard[slen:enc.maxSize], shard[slen:enc.maxSize], shard[slen:enc.maxSize])
+			copy(shard[slen:enc.maxSize], enc.zeros)
 		}
 
 		// construct equal-sized slice with stripped header
@@ -273,7 +281,7 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
 			cache[k] = enc.shardCache[k][enc.payloadOffset:enc.maxSize]
 		}
 
-		// rs encode
+		// encoding
 		if err := enc.codec.Encode(cache); err == nil {
 			ps = enc.shardCache[enc.dataShards:]
 			for k := range ps {
@@ -282,7 +290,7 @@ func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
 			}
 		}
 
-		// reset counters to zero
+		// counters resetting
 		enc.shardCount = 0
 		enc.maxSize = 0
 	}

+ 98 - 84
vendor/github.com/fatedier/kcp-go/kcp.go

@@ -104,6 +104,7 @@ type segment struct {
 	xmit     uint32
 	resendts uint32
 	fastack  uint32
+	acked    uint32 // mark if the seg has acked
 	data     []byte
 }
 
@@ -181,8 +182,11 @@ func (kcp *KCP) newSegment(size int) (seg segment) {
 }
 
 // delSegment recycles a KCP segment
-func (kcp *KCP) delSegment(seg segment) {
-	xmitBuf.Put(seg.data)
+func (kcp *KCP) delSegment(seg *segment) {
+	if seg.data != nil {
+		xmitBuf.Put(seg.data)
+		seg.data = nil
+	}
 }
 
 // PeekSize checks the size of next message in the recv queue
@@ -238,7 +242,7 @@ func (kcp *KCP) Recv(buffer []byte) (n int) {
 		buffer = buffer[len(seg.data):]
 		n += len(seg.data)
 		count++
-		kcp.delSegment(*seg)
+		kcp.delSegment(seg)
 		if seg.frg == 0 {
 			break
 		}
@@ -382,10 +386,8 @@ func (kcp *KCP) parse_ack(sn uint32) {
 	for k := range kcp.snd_buf {
 		seg := &kcp.snd_buf[k]
 		if sn == seg.sn {
-			kcp.delSegment(*seg)
-			copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
-			kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
-			kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
+			seg.acked = 1
+			kcp.delSegment(seg)
 			break
 		}
 		if _itimediff(sn, seg.sn) < 0 {
@@ -394,7 +396,7 @@ func (kcp *KCP) parse_ack(sn uint32) {
 	}
 }
 
-func (kcp *KCP) parse_fastack(sn uint32) {
+func (kcp *KCP) parse_fastack(sn, ts uint32) {
 	if _itimediff(sn, kcp.snd_una) < 0 || _itimediff(sn, kcp.snd_nxt) >= 0 {
 		return
 	}
@@ -403,7 +405,7 @@ func (kcp *KCP) parse_fastack(sn uint32) {
 		seg := &kcp.snd_buf[k]
 		if _itimediff(sn, seg.sn) < 0 {
 			break
-		} else if sn != seg.sn {
+		} else if sn != seg.sn && _itimediff(seg.ts, ts) <= 0 {
 			seg.fastack++
 		}
 	}
@@ -414,7 +416,7 @@ func (kcp *KCP) parse_una(una uint32) {
 	for k := range kcp.snd_buf {
 		seg := &kcp.snd_buf[k]
 		if _itimediff(una, seg.sn) > 0 {
-			kcp.delSegment(*seg)
+			kcp.delSegment(seg)
 			count++
 		} else {
 			break
@@ -430,12 +432,12 @@ func (kcp *KCP) ack_push(sn, ts uint32) {
 	kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
 }
 
-func (kcp *KCP) parse_data(newseg segment) {
+// returns true if data has repeated
+func (kcp *KCP) parse_data(newseg segment) bool {
 	sn := newseg.sn
 	if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
 		_itimediff(sn, kcp.rcv_nxt) < 0 {
-		kcp.delSegment(newseg)
-		return
+		return true
 	}
 
 	n := len(kcp.rcv_buf) - 1
@@ -445,7 +447,6 @@ func (kcp *KCP) parse_data(newseg segment) {
 		seg := &kcp.rcv_buf[i]
 		if seg.sn == sn {
 			repeat = true
-			atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
 			break
 		}
 		if _itimediff(sn, seg.sn) > 0 {
@@ -455,6 +456,11 @@ func (kcp *KCP) parse_data(newseg segment) {
 	}
 
 	if !repeat {
+		// replicate the content if it's new
+		dataCopy := xmitBuf.Get().([]byte)[:len(newseg.data)]
+		copy(dataCopy, newseg.data)
+		newseg.data = dataCopy
+
 		if insert_idx == n+1 {
 			kcp.rcv_buf = append(kcp.rcv_buf, newseg)
 		} else {
@@ -462,8 +468,6 @@ func (kcp *KCP) parse_data(newseg segment) {
 			copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
 			kcp.rcv_buf[insert_idx] = newseg
 		}
-	} else {
-		kcp.delSegment(newseg)
 	}
 
 	// move available data from rcv_buf -> rcv_queue
@@ -481,18 +485,19 @@ func (kcp *KCP) parse_data(newseg segment) {
 		kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
 		kcp.rcv_buf = kcp.remove_front(kcp.rcv_buf, count)
 	}
+
+	return repeat
 }
 
 // Input when you received a low level packet (eg. UDP packet), call it
 // regular indicates a regular packet has received(not from FEC)
 func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
-	una := kcp.snd_una
+	snd_una := kcp.snd_una
 	if len(data) < IKCP_OVERHEAD {
 		return -1
 	}
 
-	var maxack uint32
-	var lastackts uint32
+	var latest uint32 // the latest ack packet
 	var flag int
 	var inSegs uint64
 
@@ -535,19 +540,15 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
 
 		if cmd == IKCP_CMD_ACK {
 			kcp.parse_ack(sn)
-			kcp.shrink_buf()
-			if flag == 0 {
-				flag = 1
-				maxack = sn
-			} else if _itimediff(sn, maxack) > 0 {
-				maxack = sn
-			}
-			lastackts = ts
+			kcp.parse_fastack(sn, ts)
+			flag |= 1
+			latest = ts
 		} else if cmd == IKCP_CMD_PUSH {
+			repeat := true
 			if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
 				kcp.ack_push(sn, ts)
 				if _itimediff(sn, kcp.rcv_nxt) >= 0 {
-					seg := kcp.newSegment(int(length))
+					var seg segment
 					seg.conv = conv
 					seg.cmd = cmd
 					seg.frg = frg
@@ -555,12 +556,11 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
 					seg.ts = ts
 					seg.sn = sn
 					seg.una = una
-					copy(seg.data, data[:length])
-					kcp.parse_data(seg)
-				} else {
-					atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
+					seg.data = data[:length] // delayed data copying
+					repeat = kcp.parse_data(seg)
 				}
-			} else {
+			}
+			if regular && repeat {
 				atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
 			}
 		} else if cmd == IKCP_CMD_WASK {
@@ -578,40 +578,42 @@ func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
 	}
 	atomic.AddUint64(&DefaultSnmp.InSegs, inSegs)
 
+	// update rtt with the latest ts
+	// ignore the FEC packet
 	if flag != 0 && regular {
-		kcp.parse_fastack(maxack)
 		current := currentMs()
-		if _itimediff(current, lastackts) >= 0 {
-			kcp.update_ack(_itimediff(current, lastackts))
+		if _itimediff(current, latest) >= 0 {
+			kcp.update_ack(_itimediff(current, latest))
 		}
 	}
 
-	if _itimediff(kcp.snd_una, una) > 0 {
-		if kcp.cwnd < kcp.rmt_wnd {
-			mss := kcp.mss
-			if kcp.cwnd < kcp.ssthresh {
-				kcp.cwnd++
-				kcp.incr += mss
-			} else {
-				if kcp.incr < mss {
-					kcp.incr = mss
-				}
-				kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
-				if (kcp.cwnd+1)*mss <= kcp.incr {
+	// cwnd update when packet arrived
+	if kcp.nocwnd == 0 {
+		if _itimediff(kcp.snd_una, snd_una) > 0 {
+			if kcp.cwnd < kcp.rmt_wnd {
+				mss := kcp.mss
+				if kcp.cwnd < kcp.ssthresh {
 					kcp.cwnd++
+					kcp.incr += mss
+				} else {
+					if kcp.incr < mss {
+						kcp.incr = mss
+					}
+					kcp.incr += (mss*mss)/kcp.incr + (mss / 16)
+					if (kcp.cwnd+1)*mss <= kcp.incr {
+						kcp.cwnd++
+					}
+				}
+				if kcp.cwnd > kcp.rmt_wnd {
+					kcp.cwnd = kcp.rmt_wnd
+					kcp.incr = kcp.rmt_wnd * mss
 				}
-			}
-			if kcp.cwnd > kcp.rmt_wnd {
-				kcp.cwnd = kcp.rmt_wnd
-				kcp.incr = kcp.rmt_wnd * mss
 			}
 		}
 	}
 
 	if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately
 		kcp.flush(true)
-	} else if kcp.rmt_wnd == 0 && len(kcp.acklist) > 0 { // window zero
-		kcp.flush(true)
 	}
 	return 0
 }
@@ -624,7 +626,7 @@ func (kcp *KCP) wnd_unused() uint16 {
 }
 
 // flush pending data
-func (kcp *KCP) flush(ackOnly bool) {
+func (kcp *KCP) flush(ackOnly bool) uint32 {
 	var seg segment
 	seg.conv = kcp.conv
 	seg.cmd = IKCP_CMD_ACK
@@ -653,7 +655,7 @@ func (kcp *KCP) flush(ackOnly bool) {
 		if size > 0 {
 			kcp.output(buffer, size)
 		}
-		return
+		return kcp.interval
 	}
 
 	// probe window size (if remote window size equals zero)
@@ -723,7 +725,6 @@ func (kcp *KCP) flush(ackOnly bool) {
 		kcp.snd_buf = append(kcp.snd_buf, newseg)
 		kcp.snd_nxt++
 		newSegsCount++
-		kcp.snd_queue[k].data = nil
 	}
 	if newSegsCount > 0 {
 		kcp.snd_queue = kcp.remove_front(kcp.snd_queue, newSegsCount)
@@ -738,9 +739,15 @@ func (kcp *KCP) flush(ackOnly bool) {
 	// check for retransmissions
 	current := currentMs()
 	var change, lost, lostSegs, fastRetransSegs, earlyRetransSegs uint64
-	for k := range kcp.snd_buf {
-		segment := &kcp.snd_buf[k]
+	minrto := int32(kcp.interval)
+
+	ref := kcp.snd_buf[:len(kcp.snd_buf)] // for bounds check elimination
+	for k := range ref {
+		segment := &ref[k]
 		needsend := false
+		if segment.acked == 1 {
+			continue
+		}
 		if segment.xmit == 0 { // initial transmit
 			needsend = true
 			segment.rto = kcp.rx_rto
@@ -772,6 +779,7 @@ func (kcp *KCP) flush(ackOnly bool) {
 		}
 
 		if needsend {
+			current = currentMs() // time update for a blocking call
 			segment.xmit++
 			segment.ts = current
 			segment.wnd = seg.wnd
@@ -782,7 +790,6 @@ func (kcp *KCP) flush(ackOnly bool) {
 
 			if size+need > int(kcp.mtu) {
 				kcp.output(buffer, size)
-				current = currentMs() // time update for a blocking call
 				ptr = buffer
 			}
 
@@ -794,6 +801,11 @@ func (kcp *KCP) flush(ackOnly bool) {
 				kcp.state = 0xFFFFFFFF
 			}
 		}
+
+		// get the nearest rto
+		if rto := _itimediff(segment.resendts, current); rto > 0 && rto < minrto {
+			minrto = rto
+		}
 	}
 
 	// flash remain segments
@@ -819,32 +831,37 @@ func (kcp *KCP) flush(ackOnly bool) {
 		atomic.AddUint64(&DefaultSnmp.RetransSegs, sum)
 	}
 
-	// update ssthresh
-	// rate halving, https://tools.ietf.org/html/rfc6937
-	if change > 0 {
-		inflight := kcp.snd_nxt - kcp.snd_una
-		kcp.ssthresh = inflight / 2
-		if kcp.ssthresh < IKCP_THRESH_MIN {
-			kcp.ssthresh = IKCP_THRESH_MIN
+	// cwnd update
+	if kcp.nocwnd == 0 {
+		// update ssthresh
+		// rate halving, https://tools.ietf.org/html/rfc6937
+		if change > 0 {
+			inflight := kcp.snd_nxt - kcp.snd_una
+			kcp.ssthresh = inflight / 2
+			if kcp.ssthresh < IKCP_THRESH_MIN {
+				kcp.ssthresh = IKCP_THRESH_MIN
+			}
+			kcp.cwnd = kcp.ssthresh + resent
+			kcp.incr = kcp.cwnd * kcp.mss
 		}
-		kcp.cwnd = kcp.ssthresh + resent
-		kcp.incr = kcp.cwnd * kcp.mss
-	}
 
-	// congestion control, https://tools.ietf.org/html/rfc5681
-	if lost > 0 {
-		kcp.ssthresh = cwnd / 2
-		if kcp.ssthresh < IKCP_THRESH_MIN {
-			kcp.ssthresh = IKCP_THRESH_MIN
+		// congestion control, https://tools.ietf.org/html/rfc5681
+		if lost > 0 {
+			kcp.ssthresh = cwnd / 2
+			if kcp.ssthresh < IKCP_THRESH_MIN {
+				kcp.ssthresh = IKCP_THRESH_MIN
+			}
+			kcp.cwnd = 1
+			kcp.incr = kcp.mss
 		}
-		kcp.cwnd = 1
-		kcp.incr = kcp.mss
-	}
 
-	if kcp.cwnd < 1 {
-		kcp.cwnd = 1
-		kcp.incr = kcp.mss
+		if kcp.cwnd < 1 {
+			kcp.cwnd = 1
+			kcp.incr = kcp.mss
+		}
 	}
+
+	return uint32(minrto)
 }
 
 // Update updates state (call it repeatedly, every 10ms-100ms), or you can ask
@@ -991,8 +1008,5 @@ func (kcp *KCP) WaitSnd() int {
 // remove front n elements from queue
 func (kcp *KCP) remove_front(q []segment, n int) []segment {
 	newn := copy(q, q[n:])
-	for i := newn; i < len(q); i++ {
-		q[i] = segment{} // manual set nil for GC
-	}
 	return q[:newn]
 }

+ 239 - 229
vendor/github.com/fatedier/kcp-go/sess.go

@@ -4,7 +4,6 @@ import (
 	"crypto/rand"
 	"encoding/binary"
 	"hash/crc32"
-	"io"
 	"net"
 	"sync"
 	"sync/atomic"
@@ -12,6 +11,7 @@ import (
 
 	"github.com/pkg/errors"
 	"golang.org/x/net/ipv4"
+	"golang.org/x/net/ipv6"
 )
 
 type errTimeout struct {
@@ -23,7 +23,7 @@ func (errTimeout) Temporary() bool { return true }
 func (errTimeout) Error() string   { return "i/o timeout" }
 
 const (
-	// 16-bytes magic number for each packet
+	// 16-bytes nonce for each packet
 	nonceSize = 16
 
 	// 4-bytes packet checksum
@@ -40,9 +40,6 @@ const (
 
 	// accept backlog
 	acceptBacklog = 128
-
-	// prerouting(to session) queue
-	qlen = 128
 )
 
 const (
@@ -51,8 +48,8 @@ const (
 )
 
 var (
-	// global packet buffer
-	// shared among sending/receiving/FEC
+	// a system-wide packet buffer shared among sending, receiving and FEC
+	// to mitigate high-frequency memory allocation for packets
 	xmitBuf sync.Pool
 )
 
@@ -68,17 +65,17 @@ type (
 		updaterIdx int            // record slice index in updater
 		conn       net.PacketConn // the underlying packet connection
 		kcp        *KCP           // KCP ARQ protocol
-		l          *Listener      // point to the Listener if it's accepted by Listener
-		block      BlockCrypt     // block encryption
+		l          *Listener      // pointing to the Listener object if it's been accepted by a Listener
+		block      BlockCrypt     // block encryption object
 
 		// kcp receiving is based on packets
 		// recvbuf turns packets into stream
 		recvbuf []byte
 		bufptr  []byte
-		// extended output buffer(with header)
+		// header extended output buffer, if has header
 		ext []byte
 
-		// FEC
+		// FEC codec
 		fecDecoder *fecDecoder
 		fecEncoder *fecEncoder
 
@@ -86,16 +83,20 @@ type (
 		remote     net.Addr  // remote peer address
 		rd         time.Time // read deadline
 		wd         time.Time // write deadline
-		headerSize int       // the overall header size added before KCP frame
-		ackNoDelay bool      // send ack immediately for each incoming packet
+		headerSize int       // the header size additional to a KCP frame
+		ackNoDelay bool      // send ack immediately for each incoming packet(testing purpose)
 		writeDelay bool      // delay kcp.flush() for Write() for bulk transfer
-		dup        int       // duplicate udp packets
+		dup        int       // duplicate udp packets(testing purpose)
 
 		// notifications
-		die          chan struct{} // notify session has Closed
+		die          chan struct{} // notify current session has Closed
 		chReadEvent  chan struct{} // notify Read() can be called without blocking
 		chWriteEvent chan struct{} // notify Write() can be called without blocking
-		chErrorEvent chan error    // notify Read() have an error
+		chReadError  chan error    // notify PacketConn.Read() have an error
+		chWriteError chan error    // notify PacketConn.Write() have an error
+
+		// nonce generator
+		nonce Entropy
 
 		isClosed bool // flag the session has Closed
 		mu       sync.Mutex
@@ -114,16 +115,19 @@ type (
 func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
 	sess := new(UDPSession)
 	sess.die = make(chan struct{})
+	sess.nonce = new(nonceAES128)
+	sess.nonce.Init()
 	sess.chReadEvent = make(chan struct{}, 1)
 	sess.chWriteEvent = make(chan struct{}, 1)
-	sess.chErrorEvent = make(chan error, 1)
+	sess.chReadError = make(chan error, 1)
+	sess.chWriteError = make(chan error, 1)
 	sess.remote = remote
 	sess.conn = conn
 	sess.l = l
 	sess.block = block
 	sess.recvbuf = make([]byte, mtuLimit)
 
-	// FEC initialization
+	// FEC codec initialization
 	sess.fecDecoder = newFECDecoder(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
 	if sess.block != nil {
 		sess.fecEncoder = newFECEncoder(dataShards, parityShards, cryptHeaderSize)
@@ -131,7 +135,7 @@ func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn
 		sess.fecEncoder = newFECEncoder(dataShards, parityShards, 0)
 	}
 
-	// calculate header size
+	// calculate additional header size introduced by FEC and encryption
 	if sess.block != nil {
 		sess.headerSize += cryptHeaderSize
 	}
@@ -139,8 +143,7 @@ func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn
 		sess.headerSize += fecHeaderSizePlus2
 	}
 
-	// only allocate extended packet buffer
-	// when the extra header is required
+	// we only need to allocate extended packet buffer if we have the additional header
 	if sess.headerSize > 0 {
 		sess.ext = make([]byte, mtuLimit)
 	}
@@ -152,8 +155,8 @@ func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn
 	})
 	sess.kcp.SetMtu(IKCP_MTU_DEF - sess.headerSize)
 
-	// add current session to the global updater,
-	// which periodically calls sess.update()
+	// register current session to the global updater,
+	// which call sess.update() periodically.
 	updater.addSession(sess)
 
 	if sess.l == nil { // it's a client connection
@@ -179,6 +182,7 @@ func (s *UDPSession) Read(b []byte) (n int, err error) {
 			n = copy(b, s.bufptr)
 			s.bufptr = s.bufptr[n:]
 			s.mu.Unlock()
+			atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(n))
 			return n, nil
 		}
 
@@ -188,29 +192,29 @@ func (s *UDPSession) Read(b []byte) (n int, err error) {
 		}
 
 		if size := s.kcp.PeekSize(); size > 0 { // peek data size from kcp
-			atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(size))
-			if len(b) >= size { // direct write to b
+			if len(b) >= size { // receive data into 'b' directly
 				s.kcp.Recv(b)
 				s.mu.Unlock()
+				atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(size))
 				return size, nil
 			}
 
-			// resize kcp receive buffer
-			// to make sure recvbuf has enough capacity
+			// if necessary resize the stream buffer to guarantee a sufficent buffer space
 			if cap(s.recvbuf) < size {
 				s.recvbuf = make([]byte, size)
 			}
 
-			// resize recvbuf slice length
+			// resize the length of recvbuf to correspond to data size
 			s.recvbuf = s.recvbuf[:size]
 			s.kcp.Recv(s.recvbuf)
-			n = copy(b, s.recvbuf)   // copy to b
-			s.bufptr = s.recvbuf[n:] // update pointer
+			n = copy(b, s.recvbuf)   // copy to 'b'
+			s.bufptr = s.recvbuf[n:] // pointer update
 			s.mu.Unlock()
+			atomic.AddUint64(&DefaultSnmp.BytesReceived, uint64(n))
 			return n, nil
 		}
 
-		// read deadline
+		// deadline for current reading operation
 		var timeout *time.Timer
 		var c <-chan time.Time
 		if !s.rd.IsZero() {
@@ -230,7 +234,7 @@ func (s *UDPSession) Read(b []byte) (n int, err error) {
 		case <-s.chReadEvent:
 		case <-c:
 		case <-s.die:
-		case err = <-s.chErrorEvent:
+		case err = <-s.chReadError:
 			if timeout != nil {
 				timeout.Stop()
 			}
@@ -252,7 +256,8 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
 			return 0, errors.New(errBrokenPipe)
 		}
 
-		// api flow control
+		// controls how much data will be sent to kcp core
+		// to prevent the memory from exhuasting
 		if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
 			n = len(b)
 			for {
@@ -265,7 +270,8 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
 				}
 			}
 
-			if !s.writeDelay {
+			// flush immediately if the queue is full
+			if s.kcp.WaitSnd() >= int(s.kcp.snd_wnd) || !s.writeDelay {
 				s.kcp.flush(false)
 			}
 			s.mu.Unlock()
@@ -273,7 +279,7 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
 			return n, nil
 		}
 
-		// write deadline
+		// deadline for current writing operation
 		var timeout *time.Timer
 		var c <-chan time.Time
 		if !s.wd.IsZero() {
@@ -292,6 +298,11 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
 		case <-s.chWriteEvent:
 		case <-c:
 		case <-s.die:
+		case err = <-s.chWriteError:
+			if timeout != nil {
+				timeout.Stop()
+			}
+			return n, err
 		}
 
 		if timeout != nil {
@@ -302,13 +313,10 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
 
 // Close closes the connection.
 func (s *UDPSession) Close() error {
-	// remove this session from updater & listener(if necessary)
+	// remove current session from updater & listener(if necessary)
 	updater.removeSession(s)
 	if s.l != nil { // notify listener
-		s.l.closeSession(sessionKey{
-			addr:   s.remote.String(),
-			convID: s.kcp.conv,
-		})
+		s.l.closeSession(s.remote)
 	}
 
 	s.mu.Lock()
@@ -337,6 +345,8 @@ func (s *UDPSession) SetDeadline(t time.Time) error {
 	defer s.mu.Unlock()
 	s.rd = t
 	s.wd = t
+	s.notifyReadEvent()
+	s.notifyWriteEvent()
 	return nil
 }
 
@@ -345,6 +355,7 @@ func (s *UDPSession) SetReadDeadline(t time.Time) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.rd = t
+	s.notifyReadEvent()
 	return nil
 }
 
@@ -353,6 +364,7 @@ func (s *UDPSession) SetWriteDeadline(t time.Time) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	s.wd = t
+	s.notifyWriteEvent()
 	return nil
 }
 
@@ -420,10 +432,11 @@ func (s *UDPSession) SetDSCP(dscp int) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	if s.l == nil {
-		if nc, ok := s.conn.(*connectedUDPConn); ok {
-			return ipv4.NewConn(nc.UDPConn).SetTOS(dscp << 2)
-		} else if nc, ok := s.conn.(net.Conn); ok {
-			return ipv4.NewConn(nc).SetTOS(dscp << 2)
+		if nc, ok := s.conn.(net.Conn); ok {
+			if err := ipv4.NewConn(nc).SetTOS(dscp << 2); err != nil {
+				return ipv6.NewConn(nc).SetTrafficClass(dscp)
+			}
+			return nil
 		}
 	}
 	return errors.New(errInvalidOperation)
@@ -453,11 +466,11 @@ func (s *UDPSession) SetWriteBuffer(bytes int) error {
 	return errors.New(errInvalidOperation)
 }
 
-// output pipeline entry
-// steps for output data processing:
-// 0. Header extends
-// 1. FEC
-// 2. CRC32
+// post-processing for sending a packet from kcp core
+// steps:
+// 0. Header extending
+// 1. FEC packet generation
+// 2. CRC32 integrity
 // 3. Encryption
 // 4. WriteTo kernel
 func (s *UDPSession) output(buf []byte) {
@@ -477,13 +490,13 @@ func (s *UDPSession) output(buf []byte) {
 
 	// 2&3. crc32 & encryption
 	if s.block != nil {
-		io.ReadFull(rand.Reader, ext[:nonceSize])
+		s.nonce.Fill(ext[:nonceSize])
 		checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
 		binary.LittleEndian.PutUint32(ext[nonceSize:], checksum)
 		s.block.Encrypt(ext, ext)
 
 		for k := range ecc {
-			io.ReadFull(rand.Reader, ecc[k][:nonceSize])
+			s.nonce.Fill(ecc[k][:nonceSize])
 			checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
 			binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
 			s.block.Encrypt(ecc[k], ecc[k])
@@ -497,6 +510,8 @@ func (s *UDPSession) output(buf []byte) {
 		if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
 			nbytes += n
 			npkts++
+		} else {
+			s.notifyWriteError(err)
 		}
 	}
 
@@ -504,6 +519,8 @@ func (s *UDPSession) output(buf []byte) {
 		if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
 			nbytes += n
 			npkts++
+		} else {
+			s.notifyWriteError(err)
 		}
 	}
 	atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
@@ -513,11 +530,11 @@ func (s *UDPSession) output(buf []byte) {
 // kcp update, returns interval for next calling
 func (s *UDPSession) update() (interval time.Duration) {
 	s.mu.Lock()
-	s.kcp.flush(false)
-	if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
+	waitsnd := s.kcp.WaitSnd()
+	interval = time.Duration(s.kcp.flush(false)) * time.Millisecond
+	if s.kcp.WaitSnd() < waitsnd {
 		s.notifyWriteEvent()
 	}
-	interval = time.Duration(s.kcp.interval) * time.Millisecond
 	s.mu.Unlock()
 	return
 }
@@ -539,56 +556,77 @@ func (s *UDPSession) notifyWriteEvent() {
 	}
 }
 
+func (s *UDPSession) notifyWriteError(err error) {
+	select {
+	case s.chWriteError <- err:
+	default:
+	}
+}
+
 func (s *UDPSession) kcpInput(data []byte) {
 	var kcpInErrors, fecErrs, fecRecovered, fecParityShards uint64
 
 	if s.fecDecoder != nil {
-		f := s.fecDecoder.decodeBytes(data)
-		s.mu.Lock()
-		if f.flag == typeData {
-			if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true, s.ackNoDelay); ret != 0 {
-				kcpInErrors++
-			}
-		}
+		if len(data) > fecHeaderSize { // must be larger than fec header size
+			f := s.fecDecoder.decodeBytes(data)
+			if f.flag == typeData || f.flag == typeFEC { // header check
+				if f.flag == typeFEC {
+					fecParityShards++
+				}
+				recovers := s.fecDecoder.decode(f)
 
-		if f.flag == typeData || f.flag == typeFEC {
-			if f.flag == typeFEC {
-				fecParityShards++
-			}
+				s.mu.Lock()
+				waitsnd := s.kcp.WaitSnd()
+				if f.flag == typeData {
+					if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true, s.ackNoDelay); ret != 0 {
+						kcpInErrors++
+					}
+				}
 
-			recovers := s.fecDecoder.decode(f)
-			for _, r := range recovers {
-				if len(r) >= 2 { // must be larger than 2bytes
-					sz := binary.LittleEndian.Uint16(r)
-					if int(sz) <= len(r) && sz >= 2 {
-						if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
-							fecRecovered++
+				for _, r := range recovers {
+					if len(r) >= 2 { // must be larger than 2bytes
+						sz := binary.LittleEndian.Uint16(r)
+						if int(sz) <= len(r) && sz >= 2 {
+							if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
+								fecRecovered++
+							} else {
+								kcpInErrors++
+							}
 						} else {
-							kcpInErrors++
+							fecErrs++
 						}
 					} else {
 						fecErrs++
 					}
-				} else {
-					fecErrs++
 				}
-			}
-		}
 
-		// notify reader
-		if n := s.kcp.PeekSize(); n > 0 {
-			s.notifyReadEvent()
+				// to notify the readers to receive the data
+				if n := s.kcp.PeekSize(); n > 0 {
+					s.notifyReadEvent()
+				}
+				// to notify the writers when queue is shorter(e.g. ACKed)
+				if s.kcp.WaitSnd() < waitsnd {
+					s.notifyWriteEvent()
+				}
+				s.mu.Unlock()
+			} else {
+				atomic.AddUint64(&DefaultSnmp.InErrs, 1)
+			}
+		} else {
+			atomic.AddUint64(&DefaultSnmp.InErrs, 1)
 		}
-		s.mu.Unlock()
 	} else {
 		s.mu.Lock()
+		waitsnd := s.kcp.WaitSnd()
 		if ret := s.kcp.Input(data, true, s.ackNoDelay); ret != 0 {
 			kcpInErrors++
 		}
-		// notify reader
 		if n := s.kcp.PeekSize(); n > 0 {
 			s.notifyReadEvent()
 		}
+		if s.kcp.WaitSnd() < waitsnd {
+			s.notifyWriteEvent()
+		}
 		s.mu.Unlock()
 	}
 
@@ -608,65 +646,52 @@ func (s *UDPSession) kcpInput(data []byte) {
 	}
 }
 
-func (s *UDPSession) receiver(ch chan<- []byte) {
+// the read loop for a client session
+func (s *UDPSession) readLoop() {
+	buf := make([]byte, mtuLimit)
+	var src string
 	for {
-		data := xmitBuf.Get().([]byte)[:mtuLimit]
-		if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
-			select {
-			case ch <- data[:n]:
-			case <-s.die:
-				return
+		if n, addr, err := s.conn.ReadFrom(buf); err == nil {
+			// make sure the packet is from the same source
+			if src == "" { // set source address
+				src = addr.String()
+			} else if addr.String() != src {
+				atomic.AddUint64(&DefaultSnmp.InErrs, 1)
+				continue
 			}
-		} else if err != nil {
-			s.chErrorEvent <- err
-			return
-		} else {
-			atomic.AddUint64(&DefaultSnmp.InErrs, 1)
-		}
-	}
-}
 
-// read loop for client session
-func (s *UDPSession) readLoop() {
-	chPacket := make(chan []byte, qlen)
-	go s.receiver(chPacket)
-
-	for {
-		select {
-		case data := <-chPacket:
-			raw := data
-			dataValid := false
-			if s.block != nil {
-				s.block.Decrypt(data, data)
-				data = data[nonceSize:]
-				checksum := crc32.ChecksumIEEE(data[crcSize:])
-				if checksum == binary.LittleEndian.Uint32(data) {
-					data = data[crcSize:]
+			if n >= s.headerSize+IKCP_OVERHEAD {
+				data := buf[:n]
+				dataValid := false
+				if s.block != nil {
+					s.block.Decrypt(data, data)
+					data = data[nonceSize:]
+					checksum := crc32.ChecksumIEEE(data[crcSize:])
+					if checksum == binary.LittleEndian.Uint32(data) {
+						data = data[crcSize:]
+						dataValid = true
+					} else {
+						atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
+					}
+				} else if s.block == nil {
 					dataValid = true
-				} else {
-					atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
 				}
-			} else if s.block == nil {
-				dataValid = true
-			}
 
-			if dataValid {
-				s.kcpInput(data)
+				if dataValid {
+					s.kcpInput(data)
+				}
+			} else {
+				atomic.AddUint64(&DefaultSnmp.InErrs, 1)
 			}
-			xmitBuf.Put(raw)
-		case <-s.die:
+		} else {
+			s.chReadError <- err
 			return
 		}
 	}
 }
 
 type (
-	sessionKey struct {
-		addr   string
-		convID uint32
-	}
-
-	// Listener defines a server listening for connections
+	// Listener defines a server which will be waiting to accept incoming connections
 	Listener struct {
 		block        BlockCrypt     // block encryption
 		dataShards   int            // FEC data shard
@@ -674,120 +699,93 @@ type (
 		fecDecoder   *fecDecoder    // FEC mock initialization
 		conn         net.PacketConn // the underlying packet connection
 
-		sessions        map[sessionKey]*UDPSession // all sessions accepted by this Listener
-		chAccepts       chan *UDPSession           // Listen() backlog
-		chSessionClosed chan sessionKey            // session close queue
-		headerSize      int                        // the overall header size added before KCP frame
-		die             chan struct{}              // notify the listener has closed
-		rd              atomic.Value               // read deadline for Accept()
+		sessions        map[string]*UDPSession // all sessions accepted by this Listener
+		sessionLock     sync.Mutex
+		chAccepts       chan *UDPSession // Listen() backlog
+		chSessionClosed chan net.Addr    // session close queue
+		headerSize      int              // the additional header to a KCP frame
+		die             chan struct{}    // notify the listener has closed
+		rd              atomic.Value     // read deadline for Accept()
 		wd              atomic.Value
 	}
-
-	// incoming packet
-	inPacket struct {
-		from net.Addr
-		data []byte
-	}
 )
 
 // monitor incoming data for all connections of server
 func (l *Listener) monitor() {
-	// cache last session
-	var lastKey sessionKey
+	// a cache for session object last used
+	var lastAddr string
 	var lastSession *UDPSession
-
-	chPacket := make(chan inPacket, qlen)
-	go l.receiver(chPacket)
+	buf := make([]byte, mtuLimit)
 	for {
-		select {
-		case p := <-chPacket:
-			raw := p.data
-			data := p.data
-			from := p.from
-			dataValid := false
-			if l.block != nil {
-				l.block.Decrypt(data, data)
-				data = data[nonceSize:]
-				checksum := crc32.ChecksumIEEE(data[crcSize:])
-				if checksum == binary.LittleEndian.Uint32(data) {
-					data = data[crcSize:]
-					dataValid = true
-				} else {
-					atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
-				}
-			} else if l.block == nil {
-				dataValid = true
-			}
-
-			if dataValid {
-				var conv uint32
-				convValid := false
-				if l.fecDecoder != nil {
-					isfec := binary.LittleEndian.Uint16(data[4:])
-					if isfec == typeData {
-						conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
-						convValid = true
+		if n, from, err := l.conn.ReadFrom(buf); err == nil {
+			if n >= l.headerSize+IKCP_OVERHEAD {
+				data := buf[:n]
+				dataValid := false
+				if l.block != nil {
+					l.block.Decrypt(data, data)
+					data = data[nonceSize:]
+					checksum := crc32.ChecksumIEEE(data[crcSize:])
+					if checksum == binary.LittleEndian.Uint32(data) {
+						data = data[crcSize:]
+						dataValid = true
+					} else {
+						atomic.AddUint64(&DefaultSnmp.InCsumErrors, 1)
 					}
-				} else {
-					conv = binary.LittleEndian.Uint32(data)
-					convValid = true
+				} else if l.block == nil {
+					dataValid = true
 				}
 
-				if convValid {
-					key := sessionKey{
-						addr:   from.String(),
-						convID: conv,
-					}
+				if dataValid {
+					addr := from.String()
 					var s *UDPSession
 					var ok bool
 
-					// packets received from an address always come in batch.
+					// the packets received from an address always come in batch,
 					// cache the session for next packet, without querying map.
-					if key == lastKey {
+					if addr == lastAddr {
 						s, ok = lastSession, true
-					} else if s, ok = l.sessions[key]; ok {
-						lastSession = s
-						lastKey = key
+					} else {
+						l.sessionLock.Lock()
+						if s, ok = l.sessions[addr]; ok {
+							lastSession = s
+							lastAddr = addr
+						}
+						l.sessionLock.Unlock()
 					}
 
 					if !ok { // new session
-						if len(l.chAccepts) < cap(l.chAccepts) && len(l.sessions) < 4096 { // do not let new session overwhelm accept queue and connection count
-							s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block)
-							s.kcpInput(data)
-							l.sessions[key] = s
-							l.chAccepts <- s
+						if len(l.chAccepts) < cap(l.chAccepts) { // do not let the new sessions overwhelm accept queue
+							var conv uint32
+							convValid := false
+							if l.fecDecoder != nil {
+								isfec := binary.LittleEndian.Uint16(data[4:])
+								if isfec == typeData {
+									conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
+									convValid = true
+								}
+							} else {
+								conv = binary.LittleEndian.Uint32(data)
+								convValid = true
+							}
+
+							if convValid { // creates a new session only if the 'conv' field in kcp is accessible
+								s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block)
+								s.kcpInput(data)
+								l.sessionLock.Lock()
+								l.sessions[addr] = s
+								l.sessionLock.Unlock()
+								l.chAccepts <- s
+							}
 						}
 					} else {
 						s.kcpInput(data)
 					}
 				}
+			} else {
+				atomic.AddUint64(&DefaultSnmp.InErrs, 1)
 			}
-
-			xmitBuf.Put(raw)
-		case key := <-l.chSessionClosed:
-			if key == lastKey {
-				lastKey = sessionKey{}
-			}
-			delete(l.sessions, key)
-		case <-l.die:
-			return
-		}
-	}
-}
-
-func (l *Listener) receiver(ch chan<- inPacket) {
-	for {
-		data := xmitBuf.Get().([]byte)[:mtuLimit]
-		if n, from, err := l.conn.ReadFrom(data); err == nil && n >= l.headerSize+IKCP_OVERHEAD {
-			select {
-			case ch <- inPacket{from, data[:n]}:
-			case <-l.die:
-				return
-			}
-		} else if err != nil {
-			return
 		} else {
-			atomic.AddUint64(&DefaultSnmp.InErrs, 1)
+			return
 		}
 	}
 }
@@ -811,7 +809,10 @@ func (l *Listener) SetWriteBuffer(bytes int) error {
 // SetDSCP sets the 6bit DSCP field of IP header
 func (l *Listener) SetDSCP(dscp int) error {
 	if nc, ok := l.conn.(net.Conn); ok {
-		return ipv4.NewConn(nc).SetTOS(dscp << 2)
+		if err := ipv4.NewConn(nc).SetTOS(dscp << 2); err != nil {
+			return ipv6.NewConn(nc).SetTrafficClass(dscp)
+		}
+		return nil
 	}
 	return errors.New(errInvalidOperation)
 }
@@ -864,13 +865,14 @@ func (l *Listener) Close() error {
 }
 
 // closeSession notify the listener that a session has closed
-func (l *Listener) closeSession(key sessionKey) bool {
-	select {
-	case l.chSessionClosed <- key:
+func (l *Listener) closeSession(remote net.Addr) (ret bool) {
+	l.sessionLock.Lock()
+	defer l.sessionLock.Unlock()
+	if _, ok := l.sessions[remote.String()]; ok {
+		delete(l.sessions, remote.String())
 		return true
-	case <-l.die:
-		return false
 	}
+	return false
 }
 
 // Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
@@ -898,9 +900,9 @@ func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards
 func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*Listener, error) {
 	l := new(Listener)
 	l.conn = conn
-	l.sessions = make(map[sessionKey]*UDPSession)
+	l.sessions = make(map[string]*UDPSession)
 	l.chAccepts = make(chan *UDPSession, acceptBacklog)
-	l.chSessionClosed = make(chan sessionKey)
+	l.chSessionClosed = make(chan net.Addr)
 	l.die = make(chan struct{})
 	l.dataShards = dataShards
 	l.parityShards = parityShards
@@ -924,17 +926,22 @@ func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0
 
 // DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
 func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
+	// network type detection
 	udpaddr, err := net.ResolveUDPAddr("udp", raddr)
 	if err != nil {
 		return nil, errors.Wrap(err, "net.ResolveUDPAddr")
 	}
+	network := "udp4"
+	if udpaddr.IP.To4() == nil {
+		network = "udp"
+	}
 
-	udpconn, err := net.DialUDP("udp", nil, udpaddr)
+	conn, err := net.ListenUDP(network, nil)
 	if err != nil {
 		return nil, errors.Wrap(err, "net.DialUDP")
 	}
 
-	return NewConn(raddr, block, dataShards, parityShards, &connectedUDPConn{udpconn})
+	return NewConn(raddr, block, dataShards, parityShards, conn)
 }
 
 // NewConn establishes a session and talks KCP protocol over a packet connection.
@@ -949,6 +956,12 @@ func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn
 	return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil
 }
 
+// monotonic reference time point
+var refTime time.Time = time.Now()
+
+// currentMs returns current elasped monotonic milliseconds since program startup
+func currentMs() uint32 { return uint32(time.Now().Sub(refTime) / time.Millisecond) }
+
 func NewConnEx(convid uint32, connected bool, raddr string, block BlockCrypt, dataShards, parityShards int, conn *net.UDPConn) (*UDPSession, error) {
 	udpaddr, err := net.ResolveUDPAddr("udp", raddr)
 	if err != nil {
@@ -963,9 +976,6 @@ func NewConnEx(convid uint32, connected bool, raddr string, block BlockCrypt, da
 	return newUDPSession(convid, dataShards, parityShards, nil, pConn, udpaddr, block), nil
 }
 
-// returns current time in milliseconds
-func currentMs() uint32 { return uint32(time.Now().UnixNano() / int64(time.Millisecond)) }
-
 // connectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
 // to Write syscalls that are 4 times faster on some OS'es. This should only be
 // used for connections that were produced by a net.Dial* call.

+ 6 - 7
vendor/github.com/fatedier/kcp-go/updater.go

@@ -85,20 +85,19 @@ func (h *updateHeap) updateTask() {
 
 		h.mu.Lock()
 		hlen := h.Len()
-		now := time.Now()
 		for i := 0; i < hlen; i++ {
-			entry := heap.Pop(h).(entry)
-			if now.After(entry.ts) {
-				entry.ts = now.Add(entry.s.update())
-				heap.Push(h, entry)
+			entry := &h.entries[0]
+			if time.Now().After(entry.ts) {
+				interval := entry.s.update()
+				entry.ts = time.Now().Add(interval)
+				heap.Fix(h, 0)
 			} else {
-				heap.Push(h, entry)
 				break
 			}
 		}
 
 		if hlen > 0 {
-			timer = time.After(h.entries[0].ts.Sub(now))
+			timer = time.After(h.entries[0].ts.Sub(time.Now()))
 		}
 		h.mu.Unlock()
 	}

+ 0 - 110
vendor/github.com/fatedier/kcp-go/xor.go

@@ -1,110 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package kcp
-
-import (
-	"runtime"
-	"unsafe"
-)
-
-const wordSize = int(unsafe.Sizeof(uintptr(0)))
-const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
-
-// fastXORBytes xors in bulk. It only works on architectures that
-// support unaligned read/writes.
-func fastXORBytes(dst, a, b []byte) int {
-	n := len(a)
-	if len(b) < n {
-		n = len(b)
-	}
-
-	w := n / wordSize
-	if w > 0 {
-		wordBytes := w * wordSize
-		fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
-	}
-
-	for i := (n - n%wordSize); i < n; i++ {
-		dst[i] = a[i] ^ b[i]
-	}
-
-	return n
-}
-
-func safeXORBytes(dst, a, b []byte) int {
-	n := len(a)
-	if len(b) < n {
-		n = len(b)
-	}
-	ex := n % 8
-	for i := 0; i < ex; i++ {
-		dst[i] = a[i] ^ b[i]
-	}
-
-	for i := ex; i < n; i += 8 {
-		_dst := dst[i : i+8]
-		_a := a[i : i+8]
-		_b := b[i : i+8]
-		_dst[0] = _a[0] ^ _b[0]
-		_dst[1] = _a[1] ^ _b[1]
-		_dst[2] = _a[2] ^ _b[2]
-		_dst[3] = _a[3] ^ _b[3]
-
-		_dst[4] = _a[4] ^ _b[4]
-		_dst[5] = _a[5] ^ _b[5]
-		_dst[6] = _a[6] ^ _b[6]
-		_dst[7] = _a[7] ^ _b[7]
-	}
-	return n
-}
-
-// xorBytes xors the bytes in a and b. The destination is assumed to have enough
-// space. Returns the number of bytes xor'd.
-func xorBytes(dst, a, b []byte) int {
-	if supportsUnaligned {
-		return fastXORBytes(dst, a, b)
-	}
-	// TODO(hanwen): if (dst, a, b) have common alignment
-	// we could still try fastXORBytes. It is not clear
-	// how often this happens, and it's only worth it if
-	// the block encryption itself is hardware
-	// accelerated.
-	return safeXORBytes(dst, a, b)
-}
-
-// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
-// The arguments are assumed to be of equal length.
-func fastXORWords(dst, a, b []byte) {
-	dw := *(*[]uintptr)(unsafe.Pointer(&dst))
-	aw := *(*[]uintptr)(unsafe.Pointer(&a))
-	bw := *(*[]uintptr)(unsafe.Pointer(&b))
-	n := len(b) / wordSize
-	ex := n % 8
-	for i := 0; i < ex; i++ {
-		dw[i] = aw[i] ^ bw[i]
-	}
-
-	for i := ex; i < n; i += 8 {
-		_dw := dw[i : i+8]
-		_aw := aw[i : i+8]
-		_bw := bw[i : i+8]
-		_dw[0] = _aw[0] ^ _bw[0]
-		_dw[1] = _aw[1] ^ _bw[1]
-		_dw[2] = _aw[2] ^ _bw[2]
-		_dw[3] = _aw[3] ^ _bw[3]
-		_dw[4] = _aw[4] ^ _bw[4]
-		_dw[5] = _aw[5] ^ _bw[5]
-		_dw[6] = _aw[6] ^ _bw[6]
-		_dw[7] = _aw[7] ^ _bw[7]
-	}
-}
-
-func xorWords(dst, a, b []byte) {
-	if supportsUnaligned {
-		fastXORWords(dst, a, b)
-	} else {
-		safeXORBytes(dst, a, b)
-	}
-}

+ 0 - 16
vendor/github.com/templexxx/reedsolomon/.gitignore → vendor/github.com/klauspost/cpuid/.gitignore

@@ -22,19 +22,3 @@ _testmain.go
 *.exe
 *.test
 *.prof
-/.idea
-/backup
-/loopunroll/
-cpu.out
-mathtool/galois/
-mathtool/matrix/
-mem.out
-/examples/
-/.DS_Store
-/mathtool/cntinverse
-/invert
-/bakcup
-/buf.svg
-*.svg
-*.out
-/escape

+ 23 - 0
vendor/github.com/klauspost/cpuid/.travis.yml

@@ -0,0 +1,23 @@
+language: go
+
+sudo: false
+
+os:
+  - linux
+  - osx  
+go:
+  - 1.8.x
+  - 1.9.x
+  - 1.10.x
+  - master
+
+script: 
+ - go vet ./...
+ - go test -v ./...
+ - go test -race ./...
+ - diff <(gofmt -d .) <("") 
+
+matrix:
+  allow_failures:
+    - go: 'master'
+  fast_finish: true 

+ 35 - 0
vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt

@@ -0,0 +1,35 @@
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2015- Klaus Post & Contributors.
+Email: klauspost@gmail.com
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+    have the right to submit it under the open source license
+    indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+    of my knowledge, is covered under an appropriate open source
+    license and I have the right under that license to submit that
+    work with modifications, whether created in whole or in part
+    by me, under the same open source license (unless I am
+    permitted to submit under a different license), as indicated
+    in the file; or
+
+(c) The contribution was provided directly to me by some other
+    person who certified (a), (b) or (c) and I have not modified
+    it.
+
+(d) I understand and agree that this project and the contribution
+    are public and that a record of the contribution (including all
+    personal information I submit with it, including my sign-off) is
+    maintained indefinitely and may be redistributed consistent with
+    this project or the open source license(s) involved.

+ 22 - 0
vendor/github.com/klauspost/cpuid/LICENSE

@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+

+ 145 - 0
vendor/github.com/klauspost/cpuid/README.md

@@ -0,0 +1,145 @@
+# cpuid
+Package cpuid provides information about the CPU running the current program.
+
+CPU features are detected on startup, and kept for fast access through the life of the application.
+Currently x86 / x64 (AMD64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
+
+You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+
+Package home: https://github.com/klauspost/cpuid
+
+[![GoDoc][1]][2] [![Build Status][3]][4]
+
+[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
+[2]: https://godoc.org/github.com/klauspost/cpuid
+[3]: https://travis-ci.org/klauspost/cpuid.svg
+[4]: https://travis-ci.org/klauspost/cpuid
+
+# features
+## CPU Instructions
+*  **CMOV** (i686 CMOV)
+*  **NX** (NX (No-Execute) bit)
+*  **AMD3DNOW** (AMD 3DNOW)
+*  **AMD3DNOWEXT** (AMD 3DNowExt)
+*  **MMX** (standard MMX)
+*  **MMXEXT** (SSE integer functions or AMD MMX ext)
+*  **SSE** (SSE functions)
+*  **SSE2** (P4 SSE functions)
+*  **SSE3** (Prescott SSE3 functions)
+*  **SSSE3** (Conroe SSSE3 functions)
+*  **SSE4** (Penryn SSE4.1 functions)
+*  **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
+*  **SSE42** (Nehalem SSE4.2 functions)
+*  **AVX** (AVX functions)
+*  **AVX2** (AVX2 functions)
+*  **FMA3** (Intel FMA 3)
+*  **FMA4** (Bulldozer FMA4 functions)
+*  **XOP** (Bulldozer XOP functions)
+*  **F16C** (Half-precision floating-point conversion)
+*  **BMI1** (Bit Manipulation Instruction Set 1)
+*  **BMI2** (Bit Manipulation Instruction Set 2)
+*  **TBM** (AMD Trailing Bit Manipulation)
+*  **LZCNT** (LZCNT instruction)
+*  **POPCNT** (POPCNT instruction)
+*  **AESNI** (Advanced Encryption Standard New Instructions)
+*  **CLMUL** (Carry-less Multiplication)
+*  **HTT** (Hyperthreading (enabled))
+*  **HLE** (Hardware Lock Elision)
+*  **RTM** (Restricted Transactional Memory)
+*  **RDRAND** (RDRAND instruction is available)
+*  **RDSEED** (RDSEED instruction is available)
+*  **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
+*  **SHA** (Intel SHA Extensions)
+*  **AVX512F** (AVX-512 Foundation)
+*  **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
+*  **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
+*  **AVX512PF** (AVX-512 Prefetch Instructions)
+*  **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
+*  **AVX512CD** (AVX-512 Conflict Detection Instructions)
+*  **AVX512BW** (AVX-512 Byte and Word Instructions)
+*  **AVX512VL** (AVX-512 Vector Length Extensions)
+*  **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
+*  **MPX** (Intel MPX (Memory Protection Extensions))
+*  **ERMS** (Enhanced REP MOVSB/STOSB)
+*  **RDTSCP** (RDTSCP Instruction)
+*  **CX16** (CMPXCHG16B Instruction)
+*  **SGX** (Software Guard Extensions, with activation details)
+
+## Performance
+*  **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
+*  **SSE2SLOW** (SSE2 is supported, but usually not faster)
+*  **SSE3SLOW** (SSE3 is supported, but usually not faster)
+*  **ATOM** (Atom processor, some SSSE3 instructions are slower)
+*  **Cache line** (Probable size of a cache line).
+*  **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
+
+## Cpu Vendor/VM
+* **Intel**
+* **AMD**
+* **VIA**
+* **Transmeta**
+* **NSC**
+* **KVM**  (Kernel-based Virtual Machine)
+* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
+* **VMware**
+* **XenHVM**
+
+# installing
+
+```go get github.com/klauspost/cpuid```
+
+# example
+
+```Go
+package main
+
+import (
+	"fmt"
+	"github.com/klauspost/cpuid"
+)
+
+func main() {
+	// Print basic CPU information:
+	fmt.Println("Name:", cpuid.CPU.BrandName)
+	fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
+	fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
+	fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
+	fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
+	fmt.Println("Features:", cpuid.CPU.Features)
+	fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
+	fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
+	fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
+	fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
+	fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
+
+	// Test if we have a specific feature:
+	if cpuid.CPU.SSE() {
+		fmt.Println("We have Streaming SIMD Extensions")
+	}
+}
+```
+
+Sample output:
+```
+>go run main.go
+Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
+PhysicalCores: 2
+ThreadsPerCore: 2
+LogicalCores: 4
+Family 6 Model: 42
+Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
+Cacheline bytes: 64
+We have Streaming SIMD Extensions
+```
+
+# private package
+
+In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
+
+For this purpose all exports are removed, and functions and constants are lowercased.
+
+This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
+
+# license
+
+This code is published under an MIT license. See LICENSE file for more information.

+ 1040 - 0
vendor/github.com/klauspost/cpuid/cpuid.go

@@ -0,0 +1,1040 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// Package cpuid provides information about the CPU running the current program.
+//
+// CPU features are detected on startup, and kept for fast access through the life of the application.
+// Currently x86 / x64 (AMD64) is supported.
+//
+// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+//
+// Package home: https://github.com/klauspost/cpuid
+package cpuid
+
+import "strings"
+
+// Vendor is a representation of a CPU vendor.
+type Vendor int
+
+const (
+	Other Vendor = iota
+	Intel
+	AMD
+	VIA
+	Transmeta
+	NSC
+	KVM  // Kernel-based Virtual Machine
+	MSVM // Microsoft Hyper-V or Windows Virtual PC
+	VMware
+	XenHVM
+)
+
+const (
+	CMOV        = 1 << iota // i686 CMOV
+	NX                      // NX (No-Execute) bit
+	AMD3DNOW                // AMD 3DNOW
+	AMD3DNOWEXT             // AMD 3DNowExt
+	MMX                     // standard MMX
+	MMXEXT                  // SSE integer functions or AMD MMX ext
+	SSE                     // SSE functions
+	SSE2                    // P4 SSE functions
+	SSE3                    // Prescott SSE3 functions
+	SSSE3                   // Conroe SSSE3 functions
+	SSE4                    // Penryn SSE4.1 functions
+	SSE4A                   // AMD Barcelona microarchitecture SSE4a instructions
+	SSE42                   // Nehalem SSE4.2 functions
+	AVX                     // AVX functions
+	AVX2                    // AVX2 functions
+	FMA3                    // Intel FMA 3
+	FMA4                    // Bulldozer FMA4 functions
+	XOP                     // Bulldozer XOP functions
+	F16C                    // Half-precision floating-point conversion
+	BMI1                    // Bit Manipulation Instruction Set 1
+	BMI2                    // Bit Manipulation Instruction Set 2
+	TBM                     // AMD Trailing Bit Manipulation
+	LZCNT                   // LZCNT instruction
+	POPCNT                  // POPCNT instruction
+	AESNI                   // Advanced Encryption Standard New Instructions
+	CLMUL                   // Carry-less Multiplication
+	HTT                     // Hyperthreading (enabled)
+	HLE                     // Hardware Lock Elision
+	RTM                     // Restricted Transactional Memory
+	RDRAND                  // RDRAND instruction is available
+	RDSEED                  // RDSEED instruction is available
+	ADX                     // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	SHA                     // Intel SHA Extensions
+	AVX512F                 // AVX-512 Foundation
+	AVX512DQ                // AVX-512 Doubleword and Quadword Instructions
+	AVX512IFMA              // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF                // AVX-512 Prefetch Instructions
+	AVX512ER                // AVX-512 Exponential and Reciprocal Instructions
+	AVX512CD                // AVX-512 Conflict Detection Instructions
+	AVX512BW                // AVX-512 Byte and Word Instructions
+	AVX512VL                // AVX-512 Vector Length Extensions
+	AVX512VBMI              // AVX-512 Vector Bit Manipulation Instructions
+	MPX                     // Intel MPX (Memory Protection Extensions)
+	ERMS                    // Enhanced REP MOVSB/STOSB
+	RDTSCP                  // RDTSCP Instruction
+	CX16                    // CMPXCHG16B Instruction
+	SGX                     // Software Guard Extensions
+	IBPB                    // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	STIBP                   // Single Thread Indirect Branch Predictors
+
+	// Performance indicators
+	SSE2SLOW // SSE2 is supported, but usually not faster
+	SSE3SLOW // SSE3 is supported, but usually not faster
+	ATOM     // Atom processor, some SSSE3 instructions are slower
+)
+
+var flagNames = map[Flags]string{
+	CMOV:        "CMOV",        // i686 CMOV
+	NX:          "NX",          // NX (No-Execute) bit
+	AMD3DNOW:    "AMD3DNOW",    // AMD 3DNOW
+	AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
+	MMX:         "MMX",         // Standard MMX
+	MMXEXT:      "MMXEXT",      // SSE integer functions or AMD MMX ext
+	SSE:         "SSE",         // SSE functions
+	SSE2:        "SSE2",        // P4 SSE2 functions
+	SSE3:        "SSE3",        // Prescott SSE3 functions
+	SSSE3:       "SSSE3",       // Conroe SSSE3 functions
+	SSE4:        "SSE4.1",      // Penryn SSE4.1 functions
+	SSE4A:       "SSE4A",       // AMD Barcelona microarchitecture SSE4a instructions
+	SSE42:       "SSE4.2",      // Nehalem SSE4.2 functions
+	AVX:         "AVX",         // AVX functions
+	AVX2:        "AVX2",        // AVX functions
+	FMA3:        "FMA3",        // Intel FMA 3
+	FMA4:        "FMA4",        // Bulldozer FMA4 functions
+	XOP:         "XOP",         // Bulldozer XOP functions
+	F16C:        "F16C",        // Half-precision floating-point conversion
+	BMI1:        "BMI1",        // Bit Manipulation Instruction Set 1
+	BMI2:        "BMI2",        // Bit Manipulation Instruction Set 2
+	TBM:         "TBM",         // AMD Trailing Bit Manipulation
+	LZCNT:       "LZCNT",       // LZCNT instruction
+	POPCNT:      "POPCNT",      // POPCNT instruction
+	AESNI:       "AESNI",       // Advanced Encryption Standard New Instructions
+	CLMUL:       "CLMUL",       // Carry-less Multiplication
+	HTT:         "HTT",         // Hyperthreading (enabled)
+	HLE:         "HLE",         // Hardware Lock Elision
+	RTM:         "RTM",         // Restricted Transactional Memory
+	RDRAND:      "RDRAND",      // RDRAND instruction is available
+	RDSEED:      "RDSEED",      // RDSEED instruction is available
+	ADX:         "ADX",         // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	SHA:         "SHA",         // Intel SHA Extensions
+	AVX512F:     "AVX512F",     // AVX-512 Foundation
+	AVX512DQ:    "AVX512DQ",    // AVX-512 Doubleword and Quadword Instructions
+	AVX512IFMA:  "AVX512IFMA",  // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF:    "AVX512PF",    // AVX-512 Prefetch Instructions
+	AVX512ER:    "AVX512ER",    // AVX-512 Exponential and Reciprocal Instructions
+	AVX512CD:    "AVX512CD",    // AVX-512 Conflict Detection Instructions
+	AVX512BW:    "AVX512BW",    // AVX-512 Byte and Word Instructions
+	AVX512VL:    "AVX512VL",    // AVX-512 Vector Length Extensions
+	AVX512VBMI:  "AVX512VBMI",  // AVX-512 Vector Bit Manipulation Instructions
+	MPX:         "MPX",         // Intel MPX (Memory Protection Extensions)
+	ERMS:        "ERMS",        // Enhanced REP MOVSB/STOSB
+	RDTSCP:      "RDTSCP",      // RDTSCP Instruction
+	CX16:        "CX16",        // CMPXCHG16B Instruction
+	SGX:         "SGX",         // Software Guard Extensions
+	IBPB:        "IBPB",        // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
+	STIBP:       "STIBP",       // Single Thread Indirect Branch Predictors
+
+	// Performance indicators
+	SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
+	SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
+	ATOM:     "ATOM",     // Atom processor, some SSSE3 instructions are slower
+
+}
+
+// CPUInfo contains information about the detected system CPU.
+type CPUInfo struct {
+	BrandName      string // Brand name reported by the CPU
+	VendorID       Vendor // Comparable CPU vendor ID
+	Features       Flags  // Features of the CPU
+	PhysicalCores  int    // Number of physical processor cores in your CPU. Will be 0 if undetectable.
+	ThreadsPerCore int    // Number of threads per physical core. Will be 1 if undetectable.
+	LogicalCores   int    // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
+	Family         int    // CPU family number
+	Model          int    // CPU model number
+	CacheLine      int    // Cache line size in bytes. Will be 0 if undetectable.
+	Cache          struct {
+		L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
+		L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
+		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
+		L3  int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
+	}
+	SGX       SGXSupport
+	maxFunc   uint32
+	maxExFunc uint32
+}
+
+var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
+var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+var xgetbv func(index uint32) (eax, edx uint32)
+var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+
+// CPU contains information about the CPU as detected on startup,
+// or when Detect last was called.
+//
+// Use this as the primary entry point to you data,
+// this way queries are
+var CPU CPUInfo
+
+func init() {
+	initCPU()
+	Detect()
+}
+
+// Detect will re-detect current CPU info.
+// This will replace the content of the exported CPU variable.
+//
+// Unless you expect the CPU to change while you are running your program
+// you should not need to call this function.
+// If you call this, you must ensure that no other goroutine is accessing the
+// exported CPU variable.
+func Detect() {
+	CPU.maxFunc = maxFunctionID()
+	CPU.maxExFunc = maxExtendedFunction()
+	CPU.BrandName = brandName()
+	CPU.CacheLine = cacheLine()
+	CPU.Family, CPU.Model = familyModel()
+	CPU.Features = support()
+	CPU.SGX = hasSGX(CPU.Features&SGX != 0)
+	CPU.ThreadsPerCore = threadsPerCore()
+	CPU.LogicalCores = logicalCores()
+	CPU.PhysicalCores = physicalCores()
+	CPU.VendorID = vendorID()
+	CPU.cacheSize()
+}
+
+// Generated here: http://play.golang.org/p/BxFH2Gdc0G
+
+// Cmov indicates support of CMOV instructions
+func (c CPUInfo) Cmov() bool {
+	return c.Features&CMOV != 0
+}
+
+// Amd3dnow indicates support of AMD 3DNOW! instructions
+func (c CPUInfo) Amd3dnow() bool {
+	return c.Features&AMD3DNOW != 0
+}
+
+// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
+func (c CPUInfo) Amd3dnowExt() bool {
+	return c.Features&AMD3DNOWEXT != 0
+}
+
+// MMX indicates support of MMX instructions
+func (c CPUInfo) MMX() bool {
+	return c.Features&MMX != 0
+}
+
+// MMXExt indicates support of MMXEXT instructions
+// (SSE integer functions or AMD MMX ext)
+func (c CPUInfo) MMXExt() bool {
+	return c.Features&MMXEXT != 0
+}
+
+// SSE indicates support of SSE instructions
+func (c CPUInfo) SSE() bool {
+	return c.Features&SSE != 0
+}
+
+// SSE2 indicates support of SSE 2 instructions
+func (c CPUInfo) SSE2() bool {
+	return c.Features&SSE2 != 0
+}
+
+// SSE3 indicates support of SSE 3 instructions
+func (c CPUInfo) SSE3() bool {
+	return c.Features&SSE3 != 0
+}
+
+// SSSE3 indicates support of SSSE 3 instructions
+func (c CPUInfo) SSSE3() bool {
+	return c.Features&SSSE3 != 0
+}
+
+// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
+func (c CPUInfo) SSE4() bool {
+	return c.Features&SSE4 != 0
+}
+
+// SSE42 indicates support of SSE4.2 instructions
+func (c CPUInfo) SSE42() bool {
+	return c.Features&SSE42 != 0
+}
+
+// AVX indicates support of AVX instructions
+// and operating system support of AVX instructions
+func (c CPUInfo) AVX() bool {
+	return c.Features&AVX != 0
+}
+
+// AVX2 indicates support of AVX2 instructions
+func (c CPUInfo) AVX2() bool {
+	return c.Features&AVX2 != 0
+}
+
+// FMA3 indicates support of FMA3 instructions
+func (c CPUInfo) FMA3() bool {
+	return c.Features&FMA3 != 0
+}
+
+// FMA4 indicates support of FMA4 instructions
+func (c CPUInfo) FMA4() bool {
+	return c.Features&FMA4 != 0
+}
+
+// XOP indicates support of XOP instructions
+func (c CPUInfo) XOP() bool {
+	return c.Features&XOP != 0
+}
+
+// F16C indicates support of F16C instructions
+func (c CPUInfo) F16C() bool {
+	return c.Features&F16C != 0
+}
+
+// BMI1 indicates support of BMI1 instructions
+func (c CPUInfo) BMI1() bool {
+	return c.Features&BMI1 != 0
+}
+
+// BMI2 indicates support of BMI2 instructions
+func (c CPUInfo) BMI2() bool {
+	return c.Features&BMI2 != 0
+}
+
+// TBM indicates support of TBM instructions
+// (AMD Trailing Bit Manipulation)
+func (c CPUInfo) TBM() bool {
+	return c.Features&TBM != 0
+}
+
+// Lzcnt indicates support of LZCNT instruction
+func (c CPUInfo) Lzcnt() bool {
+	return c.Features&LZCNT != 0
+}
+
+// Popcnt indicates support of POPCNT instruction
+func (c CPUInfo) Popcnt() bool {
+	return c.Features&POPCNT != 0
+}
+
+// HTT indicates the processor has Hyperthreading enabled
+func (c CPUInfo) HTT() bool {
+	return c.Features&HTT != 0
+}
+
+// SSE2Slow indicates that SSE2 may be slow on this processor
+func (c CPUInfo) SSE2Slow() bool {
+	return c.Features&SSE2SLOW != 0
+}
+
+// SSE3Slow indicates that SSE3 may be slow on this processor
+func (c CPUInfo) SSE3Slow() bool {
+	return c.Features&SSE3SLOW != 0
+}
+
+// AesNi indicates support of AES-NI instructions
+// (Advanced Encryption Standard New Instructions)
+func (c CPUInfo) AesNi() bool {
+	return c.Features&AESNI != 0
+}
+
+// Clmul indicates support of CLMUL instructions
+// (Carry-less Multiplication)
+func (c CPUInfo) Clmul() bool {
+	return c.Features&CLMUL != 0
+}
+
+// NX indicates support of NX (No-Execute) bit
+func (c CPUInfo) NX() bool {
+	return c.Features&NX != 0
+}
+
+// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
+func (c CPUInfo) SSE4A() bool {
+	return c.Features&SSE4A != 0
+}
+
+// HLE indicates support of Hardware Lock Elision
+func (c CPUInfo) HLE() bool {
+	return c.Features&HLE != 0
+}
+
+// RTM indicates support of Restricted Transactional Memory
+func (c CPUInfo) RTM() bool {
+	return c.Features&RTM != 0
+}
+
+// Rdrand indicates support of RDRAND instruction is available
+func (c CPUInfo) Rdrand() bool {
+	return c.Features&RDRAND != 0
+}
+
+// Rdseed indicates support of RDSEED instruction is available
+func (c CPUInfo) Rdseed() bool {
+	return c.Features&RDSEED != 0
+}
+
+// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+func (c CPUInfo) ADX() bool {
+	return c.Features&ADX != 0
+}
+
+// SHA indicates support of Intel SHA Extensions
+func (c CPUInfo) SHA() bool {
+	return c.Features&SHA != 0
+}
+
+// AVX512F indicates support of AVX-512 Foundation
+func (c CPUInfo) AVX512F() bool {
+	return c.Features&AVX512F != 0
+}
+
+// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
+func (c CPUInfo) AVX512DQ() bool {
+	return c.Features&AVX512DQ != 0
+}
+
+// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
+func (c CPUInfo) AVX512IFMA() bool {
+	return c.Features&AVX512IFMA != 0
+}
+
+// AVX512PF indicates support of AVX-512 Prefetch Instructions
+func (c CPUInfo) AVX512PF() bool {
+	return c.Features&AVX512PF != 0
+}
+
+// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
+func (c CPUInfo) AVX512ER() bool {
+	return c.Features&AVX512ER != 0
+}
+
+// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
+func (c CPUInfo) AVX512CD() bool {
+	return c.Features&AVX512CD != 0
+}
+
+// AVX512BW indicates support of AVX-512 Byte and Word Instructions
+func (c CPUInfo) AVX512BW() bool {
+	return c.Features&AVX512BW != 0
+}
+
+// AVX512VL indicates support of AVX-512 Vector Length Extensions
+func (c CPUInfo) AVX512VL() bool {
+	return c.Features&AVX512VL != 0
+}
+
+// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
+func (c CPUInfo) AVX512VBMI() bool {
+	return c.Features&AVX512VBMI != 0
+}
+
+// MPX indicates support of Intel MPX (Memory Protection Extensions)
+func (c CPUInfo) MPX() bool {
+	return c.Features&MPX != 0
+}
+
+// ERMS indicates support of Enhanced REP MOVSB/STOSB
+func (c CPUInfo) ERMS() bool {
+	return c.Features&ERMS != 0
+}
+
+// RDTSCP Instruction is available.
+func (c CPUInfo) RDTSCP() bool {
+	return c.Features&RDTSCP != 0
+}
+
+// CX16 indicates if CMPXCHG16B instruction is available.
+func (c CPUInfo) CX16() bool {
+	return c.Features&CX16 != 0
+}
+
+// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
+// So TSX simply checks that.
+func (c CPUInfo) TSX() bool {
+	return c.Features&(HLE|RTM) == HLE|RTM
+}
+
+// Atom indicates an Atom processor
+func (c CPUInfo) Atom() bool {
+	return c.Features&ATOM != 0
+}
+
+// Intel returns true if vendor is recognized as Intel
+func (c CPUInfo) Intel() bool {
+	return c.VendorID == Intel
+}
+
+// AMD returns true if vendor is recognized as AMD
+func (c CPUInfo) AMD() bool {
+	return c.VendorID == AMD
+}
+
+// Transmeta returns true if vendor is recognized as Transmeta
+func (c CPUInfo) Transmeta() bool {
+	return c.VendorID == Transmeta
+}
+
+// NSC returns true if vendor is recognized as National Semiconductor
+func (c CPUInfo) NSC() bool {
+	return c.VendorID == NSC
+}
+
+// VIA returns true if vendor is recognized as VIA
+func (c CPUInfo) VIA() bool {
+	return c.VendorID == VIA
+}
+
+// RTCounter returns the 64-bit time-stamp counter
+// Uses the RDTSCP instruction. The value 0 is returned
+// if the CPU does not support the instruction.
+func (c CPUInfo) RTCounter() uint64 {
+	if !c.RDTSCP() {
+		return 0
+	}
+	a, _, _, d := rdtscpAsm()
+	return uint64(a) | (uint64(d) << 32)
+}
+
+// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
+// This variable is OS dependent, but on Linux contains information
+// about the current cpu/core the code is running on.
+// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
+func (c CPUInfo) Ia32TscAux() uint32 {
+	if !c.RDTSCP() {
+		return 0
+	}
+	_, _, ecx, _ := rdtscpAsm()
+	return ecx
+}
+
+// LogicalCPU will return the Logical CPU the code is currently executing on.
+// This is likely to change when the OS re-schedules the running thread
+// to another CPU.
+// If the current core cannot be detected, -1 will be returned.
+func (c CPUInfo) LogicalCPU() int {
+	if c.maxFunc < 1 {
+		return -1
+	}
+	_, ebx, _, _ := cpuid(1)
+	return int(ebx >> 24)
+}
+
+// VM Will return true if the cpu id indicates we are in
+// a virtual machine. This is only a hint, and will very likely
+// have many false negatives.
+func (c CPUInfo) VM() bool {
+	switch c.VendorID {
+	case MSVM, KVM, VMware, XenHVM:
+		return true
+	}
+	return false
+}
+
+// Flags contains detected cpu features and caracteristics
+type Flags uint64
+
+// String returns a string representation of the detected
+// CPU features.
+func (f Flags) String() string {
+	return strings.Join(f.Strings(), ",")
+}
+
+// Strings returns and array of the detected features.
+func (f Flags) Strings() []string {
+	s := support()
+	r := make([]string, 0, 20)
+	for i := uint(0); i < 64; i++ {
+		key := Flags(1 << i)
+		val := flagNames[key]
+		if s&key != 0 {
+			r = append(r, val)
+		}
+	}
+	return r
+}
+
+func maxExtendedFunction() uint32 {
+	eax, _, _, _ := cpuid(0x80000000)
+	return eax
+}
+
+func maxFunctionID() uint32 {
+	a, _, _, _ := cpuid(0)
+	return a
+}
+
+func brandName() string {
+	if maxExtendedFunction() >= 0x80000004 {
+		v := make([]uint32, 0, 48)
+		for i := uint32(0); i < 3; i++ {
+			a, b, c, d := cpuid(0x80000002 + i)
+			v = append(v, a, b, c, d)
+		}
+		return strings.Trim(string(valAsString(v...)), " ")
+	}
+	return "unknown"
+}
+
+func threadsPerCore() int {
+	mfi := maxFunctionID()
+	if mfi < 0x4 || vendorID() != Intel {
+		return 1
+	}
+
+	if mfi < 0xb {
+		_, b, _, d := cpuid(1)
+		if (d & (1 << 28)) != 0 {
+			// v will contain logical core count
+			v := (b >> 16) & 255
+			if v > 1 {
+				a4, _, _, _ := cpuid(4)
+				// physical cores
+				v2 := (a4 >> 26) + 1
+				if v2 > 0 {
+					return int(v) / int(v2)
+				}
+			}
+		}
+		return 1
+	}
+	_, b, _, _ := cpuidex(0xb, 0)
+	if b&0xffff == 0 {
+		return 1
+	}
+	return int(b & 0xffff)
+}
+
+func logicalCores() int {
+	mfi := maxFunctionID()
+	switch vendorID() {
+	case Intel:
+		// Use this on old Intel processors
+		if mfi < 0xb {
+			if mfi < 1 {
+				return 0
+			}
+			// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
+			// that can be assigned to logical processors in a physical package.
+			// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
+			_, ebx, _, _ := cpuid(1)
+			logical := (ebx >> 16) & 0xff
+			return int(logical)
+		}
+		_, b, _, _ := cpuidex(0xb, 1)
+		return int(b & 0xffff)
+	case AMD:
+		_, b, _, _ := cpuid(1)
+		return int((b >> 16) & 0xff)
+	default:
+		return 0
+	}
+}
+
+func familyModel() (int, int) {
+	if maxFunctionID() < 0x1 {
+		return 0, 0
+	}
+	eax, _, _, _ := cpuid(1)
+	family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
+	model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
+	return int(family), int(model)
+}
+
+func physicalCores() int {
+	switch vendorID() {
+	case Intel:
+		return logicalCores() / threadsPerCore()
+	case AMD:
+		if maxExtendedFunction() >= 0x80000008 {
+			_, _, c, _ := cpuid(0x80000008)
+			return int(c&0xff) + 1
+		}
+	}
+	return 0
+}
+
+// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
+var vendorMapping = map[string]Vendor{
+	"AMDisbetter!": AMD,
+	"AuthenticAMD": AMD,
+	"CentaurHauls": VIA,
+	"GenuineIntel": Intel,
+	"TransmetaCPU": Transmeta,
+	"GenuineTMx86": Transmeta,
+	"Geode by NSC": NSC,
+	"VIA VIA VIA ": VIA,
+	"KVMKVMKVMKVM": KVM,
+	"Microsoft Hv": MSVM,
+	"VMwareVMware": VMware,
+	"XenVMMXenVMM": XenHVM,
+}
+
+func vendorID() Vendor {
+	_, b, c, d := cpuid(0)
+	v := valAsString(b, d, c)
+	vend, ok := vendorMapping[string(v)]
+	if !ok {
+		return Other
+	}
+	return vend
+}
+
+func cacheLine() int {
+	if maxFunctionID() < 0x1 {
+		return 0
+	}
+
+	_, ebx, _, _ := cpuid(1)
+	cache := (ebx & 0xff00) >> 5 // cflush size
+	if cache == 0 && maxExtendedFunction() >= 0x80000006 {
+		_, _, ecx, _ := cpuid(0x80000006)
+		cache = ecx & 0xff // cacheline size
+	}
+	// TODO: Read from Cache and TLB Information
+	return int(cache)
+}
+
+func (c *CPUInfo) cacheSize() {
+	c.Cache.L1D = -1
+	c.Cache.L1I = -1
+	c.Cache.L2 = -1
+	c.Cache.L3 = -1
+	vendor := vendorID()
+	switch vendor {
+	case Intel:
+		if maxFunctionID() < 4 {
+			return
+		}
+		for i := uint32(0); ; i++ {
+			eax, ebx, ecx, _ := cpuidex(4, i)
+			cacheType := eax & 15
+			if cacheType == 0 {
+				break
+			}
+			cacheLevel := (eax >> 5) & 7
+			coherency := int(ebx&0xfff) + 1
+			partitions := int((ebx>>12)&0x3ff) + 1
+			associativity := int((ebx>>22)&0x3ff) + 1
+			sets := int(ecx) + 1
+			size := associativity * partitions * coherency * sets
+			switch cacheLevel {
+			case 1:
+				if cacheType == 1 {
+					// 1 = Data Cache
+					c.Cache.L1D = size
+				} else if cacheType == 2 {
+					// 2 = Instruction Cache
+					c.Cache.L1I = size
+				} else {
+					if c.Cache.L1D < 0 {
+						c.Cache.L1I = size
+					}
+					if c.Cache.L1I < 0 {
+						c.Cache.L1I = size
+					}
+				}
+			case 2:
+				c.Cache.L2 = size
+			case 3:
+				c.Cache.L3 = size
+			}
+		}
+	case AMD:
+		// Untested.
+		if maxExtendedFunction() < 0x80000005 {
+			return
+		}
+		_, _, ecx, edx := cpuid(0x80000005)
+		c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
+		c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
+
+		if maxExtendedFunction() < 0x80000006 {
+			return
+		}
+		_, _, ecx, _ = cpuid(0x80000006)
+		c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
+	}
+
+	return
+}
+
+type SGXSupport struct {
+	Available           bool
+	SGX1Supported       bool
+	SGX2Supported       bool
+	MaxEnclaveSizeNot64 int64
+	MaxEnclaveSize64    int64
+}
+
+func hasSGX(available bool) (rval SGXSupport) {
+	rval.Available = available
+
+	if !available {
+		return
+	}
+
+	a, _, _, d := cpuidex(0x12, 0)
+	rval.SGX1Supported = a&0x01 != 0
+	rval.SGX2Supported = a&0x02 != 0
+	rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF)     // pow 2
+	rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
+
+	return
+}
+
+func support() Flags {
+	mfi := maxFunctionID()
+	vend := vendorID()
+	if mfi < 0x1 {
+		return 0
+	}
+	rval := uint64(0)
+	_, _, c, d := cpuid(1)
+	if (d & (1 << 15)) != 0 {
+		rval |= CMOV
+	}
+	if (d & (1 << 23)) != 0 {
+		rval |= MMX
+	}
+	if (d & (1 << 25)) != 0 {
+		rval |= MMXEXT
+	}
+	if (d & (1 << 25)) != 0 {
+		rval |= SSE
+	}
+	if (d & (1 << 26)) != 0 {
+		rval |= SSE2
+	}
+	if (c & 1) != 0 {
+		rval |= SSE3
+	}
+	if (c & 0x00000200) != 0 {
+		rval |= SSSE3
+	}
+	if (c & 0x00080000) != 0 {
+		rval |= SSE4
+	}
+	if (c & 0x00100000) != 0 {
+		rval |= SSE42
+	}
+	if (c & (1 << 25)) != 0 {
+		rval |= AESNI
+	}
+	if (c & (1 << 1)) != 0 {
+		rval |= CLMUL
+	}
+	if c&(1<<23) != 0 {
+		rval |= POPCNT
+	}
+	if c&(1<<30) != 0 {
+		rval |= RDRAND
+	}
+	if c&(1<<29) != 0 {
+		rval |= F16C
+	}
+	if c&(1<<13) != 0 {
+		rval |= CX16
+	}
+	if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
+		if threadsPerCore() > 1 {
+			rval |= HTT
+		}
+	}
+
+	// Check XGETBV, OXSAVE and AVX bits
+	if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
+		// Check for OS support
+		eax, _ := xgetbv(0)
+		if (eax & 0x6) == 0x6 {
+			rval |= AVX
+			if (c & 0x00001000) != 0 {
+				rval |= FMA3
+			}
+		}
+	}
+
+	// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
+	if mfi >= 7 {
+		_, ebx, ecx, edx := cpuidex(7, 0)
+		if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
+			rval |= AVX2
+		}
+		if (ebx & 0x00000008) != 0 {
+			rval |= BMI1
+			if (ebx & 0x00000100) != 0 {
+				rval |= BMI2
+			}
+		}
+		if ebx&(1<<2) != 0 {
+			rval |= SGX
+		}
+		if ebx&(1<<4) != 0 {
+			rval |= HLE
+		}
+		if ebx&(1<<9) != 0 {
+			rval |= ERMS
+		}
+		if ebx&(1<<11) != 0 {
+			rval |= RTM
+		}
+		if ebx&(1<<14) != 0 {
+			rval |= MPX
+		}
+		if ebx&(1<<18) != 0 {
+			rval |= RDSEED
+		}
+		if ebx&(1<<19) != 0 {
+			rval |= ADX
+		}
+		if ebx&(1<<29) != 0 {
+			rval |= SHA
+		}
+		if edx&(1<<26) != 0 {
+			rval |= IBPB
+		}
+		if edx&(1<<27) != 0 {
+			rval |= STIBP
+		}
+
+		// Only detect AVX-512 features if XGETBV is supported
+		if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
+			// Check for OS support
+			eax, _ := xgetbv(0)
+
+			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+			// ZMM16-ZMM31 state are enabled by OS)
+			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
+			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
+				if ebx&(1<<16) != 0 {
+					rval |= AVX512F
+				}
+				if ebx&(1<<17) != 0 {
+					rval |= AVX512DQ
+				}
+				if ebx&(1<<21) != 0 {
+					rval |= AVX512IFMA
+				}
+				if ebx&(1<<26) != 0 {
+					rval |= AVX512PF
+				}
+				if ebx&(1<<27) != 0 {
+					rval |= AVX512ER
+				}
+				if ebx&(1<<28) != 0 {
+					rval |= AVX512CD
+				}
+				if ebx&(1<<30) != 0 {
+					rval |= AVX512BW
+				}
+				if ebx&(1<<31) != 0 {
+					rval |= AVX512VL
+				}
+				// ecx
+				if ecx&(1<<1) != 0 {
+					rval |= AVX512VBMI
+				}
+			}
+		}
+	}
+
+	if maxExtendedFunction() >= 0x80000001 {
+		_, _, c, d := cpuid(0x80000001)
+		if (c & (1 << 5)) != 0 {
+			rval |= LZCNT
+			rval |= POPCNT
+		}
+		if (d & (1 << 31)) != 0 {
+			rval |= AMD3DNOW
+		}
+		if (d & (1 << 30)) != 0 {
+			rval |= AMD3DNOWEXT
+		}
+		if (d & (1 << 23)) != 0 {
+			rval |= MMX
+		}
+		if (d & (1 << 22)) != 0 {
+			rval |= MMXEXT
+		}
+		if (c & (1 << 6)) != 0 {
+			rval |= SSE4A
+		}
+		if d&(1<<20) != 0 {
+			rval |= NX
+		}
+		if d&(1<<27) != 0 {
+			rval |= RDTSCP
+		}
+
+		/* Allow for selectively disabling SSE2 functions on AMD processors
+		   with SSE2 support but not SSE4a. This includes Athlon64, some
+		   Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
+		   than SSE2 often enough to utilize this special-case flag.
+		   AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
+		   so that SSE2 is used unless explicitly disabled by checking
+		   AV_CPU_FLAG_SSE2SLOW. */
+		if vendorID() != Intel &&
+			rval&SSE2 != 0 && (c&0x00000040) == 0 {
+			rval |= SSE2SLOW
+		}
+
+		/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
+		 * used unless the OS has AVX support. */
+		if (rval & AVX) != 0 {
+			if (c & 0x00000800) != 0 {
+				rval |= XOP
+			}
+			if (c & 0x00010000) != 0 {
+				rval |= FMA4
+			}
+		}
+
+		if vendorID() == Intel {
+			family, model := familyModel()
+			if family == 6 && (model == 9 || model == 13 || model == 14) {
+				/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
+				 * 6/14 (core1 "yonah") theoretically support sse2, but it's
+				 * usually slower than mmx. */
+				if (rval & SSE2) != 0 {
+					rval |= SSE2SLOW
+				}
+				if (rval & SSE3) != 0 {
+					rval |= SSE3SLOW
+				}
+			}
+			/* The Atom processor has SSSE3 support, which is useful in many cases,
+			 * but sometimes the SSSE3 version is slower than the SSE2 equivalent
+			 * on the Atom, but is generally faster on other processors supporting
+			 * SSSE3. This flag allows for selectively disabling certain SSSE3
+			 * functions on the Atom. */
+			if family == 6 && model == 28 {
+				rval |= ATOM
+			}
+		}
+	}
+	return Flags(rval)
+}
+
+func valAsString(values ...uint32) []byte {
+	r := make([]byte, 4*len(values))
+	for i, v := range values {
+		dst := r[i*4:]
+		dst[0] = byte(v & 0xff)
+		dst[1] = byte((v >> 8) & 0xff)
+		dst[2] = byte((v >> 16) & 0xff)
+		dst[3] = byte((v >> 24) & 0xff)
+		switch {
+		case dst[0] == 0:
+			return r[:i*4]
+		case dst[1] == 0:
+			return r[:i*4+1]
+		case dst[2] == 0:
+			return r[:i*4+2]
+		case dst[3] == 0:
+			return r[:i*4+3]
+		}
+	}
+	return r
+}

+ 42 - 0
vendor/github.com/klauspost/cpuid/cpuid_386.s

@@ -0,0 +1,42 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// +build 386,!gccgo
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+	XORL CX, CX
+	MOVL op+0(FP), AX
+	CPUID
+	MOVL AX, eax+4(FP)
+	MOVL BX, ebx+8(FP)
+	MOVL CX, ecx+12(FP)
+	MOVL DX, edx+16(FP)
+	RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+	MOVL op+0(FP), AX
+	MOVL op2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func xgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+	MOVL index+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+	MOVL AX, eax+4(FP)
+	MOVL DX, edx+8(FP)
+	RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+	MOVL AX, eax+0(FP)
+	MOVL BX, ebx+4(FP)
+	MOVL CX, ecx+8(FP)
+	MOVL DX, edx+12(FP)
+	RET

+ 42 - 0
vendor/github.com/klauspost/cpuid/cpuid_amd64.s

@@ -0,0 +1,42 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build amd64,!gccgo
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+	XORQ CX, CX
+	MOVL op+0(FP), AX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+	MOVL op+0(FP), AX
+	MOVL op2+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func asmXgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+	MOVL index+0(FP), CX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+	MOVL AX, eax+8(FP)
+	MOVL DX, edx+12(FP)
+	RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+	MOVL AX, eax+0(FP)
+	MOVL BX, ebx+4(FP)
+	MOVL CX, ecx+8(FP)
+	MOVL DX, edx+12(FP)
+	RET

+ 17 - 0
vendor/github.com/klauspost/cpuid/detect_intel.go

@@ -0,0 +1,17 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// +build 386,!gccgo amd64,!gccgo
+
+package cpuid
+
+func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+func asmXgetbv(index uint32) (eax, edx uint32)
+func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+
+func initCPU() {
+	cpuid = asmCpuid
+	cpuidex = asmCpuidex
+	xgetbv = asmXgetbv
+	rdtscpAsm = asmRdtscpAsm
+}

+ 23 - 0
vendor/github.com/klauspost/cpuid/detect_ref.go

@@ -0,0 +1,23 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// +build !amd64,!386 gccgo
+
+package cpuid
+
+func initCPU() {
+	cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+
+	cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+
+	xgetbv = func(index uint32) (eax, edx uint32) {
+		return 0, 0
+	}
+
+	rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
+		return 0, 0, 0, 0
+	}
+}

+ 4 - 0
vendor/github.com/klauspost/cpuid/generate.go

@@ -0,0 +1,4 @@
+package cpuid
+
+//go:generate go run private-gen.go
+//go:generate gofmt -w ./private

+ 476 - 0
vendor/github.com/klauspost/cpuid/private-gen.go

@@ -0,0 +1,476 @@
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/ast"
+	"go/parser"
+	"go/printer"
+	"go/token"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"reflect"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+var inFiles = []string{"cpuid.go", "cpuid_test.go"}
+var copyFiles = []string{"cpuid_amd64.s", "cpuid_386.s", "detect_ref.go", "detect_intel.go"}
+var fileSet = token.NewFileSet()
+var reWrites = []rewrite{
+	initRewrite("CPUInfo -> cpuInfo"),
+	initRewrite("Vendor -> vendor"),
+	initRewrite("Flags -> flags"),
+	initRewrite("Detect -> detect"),
+	initRewrite("CPU -> cpu"),
+}
+var excludeNames = map[string]bool{"string": true, "join": true, "trim": true,
+	// cpuid_test.go
+	"t": true, "println": true, "logf": true, "log": true, "fatalf": true, "fatal": true,
+}
+
+var excludePrefixes = []string{"test", "benchmark"}
+
+func main() {
+	Package := "private"
+	parserMode := parser.ParseComments
+	exported := make(map[string]rewrite)
+	for _, file := range inFiles {
+		in, err := os.Open(file)
+		if err != nil {
+			log.Fatalf("opening input", err)
+		}
+
+		src, err := ioutil.ReadAll(in)
+		if err != nil {
+			log.Fatalf("reading input", err)
+		}
+
+		astfile, err := parser.ParseFile(fileSet, file, src, parserMode)
+		if err != nil {
+			log.Fatalf("parsing input", err)
+		}
+
+		for _, rw := range reWrites {
+			astfile = rw(astfile)
+		}
+
+		// Inspect the AST and print all identifiers and literals.
+		var startDecl token.Pos
+		var endDecl token.Pos
+		ast.Inspect(astfile, func(n ast.Node) bool {
+			var s string
+			switch x := n.(type) {
+			case *ast.Ident:
+				if x.IsExported() {
+					t := strings.ToLower(x.Name)
+					for _, pre := range excludePrefixes {
+						if strings.HasPrefix(t, pre) {
+							return true
+						}
+					}
+					if excludeNames[t] != true {
+						//if x.Pos() > startDecl && x.Pos() < endDecl {
+						exported[x.Name] = initRewrite(x.Name + " -> " + t)
+					}
+				}
+
+			case *ast.GenDecl:
+				if x.Tok == token.CONST && x.Lparen > 0 {
+					startDecl = x.Lparen
+					endDecl = x.Rparen
+					// fmt.Printf("Decl:%s -> %s\n", fileSet.Position(startDecl), fileSet.Position(endDecl))
+				}
+			}
+			if s != "" {
+				fmt.Printf("%s:\t%s\n", fileSet.Position(n.Pos()), s)
+			}
+			return true
+		})
+
+		for _, rw := range exported {
+			astfile = rw(astfile)
+		}
+
+		var buf bytes.Buffer
+
+		printer.Fprint(&buf, fileSet, astfile)
+
+		// Remove package documentation and insert information
+		s := buf.String()
+		ind := strings.Index(buf.String(), "\npackage cpuid")
+		s = s[ind:]
+		s = "// Generated, DO NOT EDIT,\n" +
+			"// but copy it to your own project and rename the package.\n" +
+			"// See more at http://github.com/klauspost/cpuid\n" +
+			s
+
+		outputName := Package + string(os.PathSeparator) + file
+
+		err = ioutil.WriteFile(outputName, []byte(s), 0644)
+		if err != nil {
+			log.Fatalf("writing output: %s", err)
+		}
+		log.Println("Generated", outputName)
+	}
+
+	for _, file := range copyFiles {
+		dst := ""
+		if strings.HasPrefix(file, "cpuid") {
+			dst = Package + string(os.PathSeparator) + file
+		} else {
+			dst = Package + string(os.PathSeparator) + "cpuid_" + file
+		}
+		err := copyFile(file, dst)
+		if err != nil {
+			log.Fatalf("copying file: %s", err)
+		}
+		log.Println("Copied", dst)
+	}
+}
+
+// CopyFile copies a file from src to dst. If src and dst files exist, and are
+// the same, then return success. Copy the file contents from src to dst.
+func copyFile(src, dst string) (err error) {
+	sfi, err := os.Stat(src)
+	if err != nil {
+		return
+	}
+	if !sfi.Mode().IsRegular() {
+		// cannot copy non-regular files (e.g., directories,
+		// symlinks, devices, etc.)
+		return fmt.Errorf("CopyFile: non-regular source file %s (%q)", sfi.Name(), sfi.Mode().String())
+	}
+	dfi, err := os.Stat(dst)
+	if err != nil {
+		if !os.IsNotExist(err) {
+			return
+		}
+	} else {
+		if !(dfi.Mode().IsRegular()) {
+			return fmt.Errorf("CopyFile: non-regular destination file %s (%q)", dfi.Name(), dfi.Mode().String())
+		}
+		if os.SameFile(sfi, dfi) {
+			return
+		}
+	}
+	err = copyFileContents(src, dst)
+	return
+}
+
+// copyFileContents copies the contents of the file named src to the file named
+// by dst. The file will be created if it does not already exist. If the
+// destination file exists, all it's contents will be replaced by the contents
+// of the source file.
+func copyFileContents(src, dst string) (err error) {
+	in, err := os.Open(src)
+	if err != nil {
+		return
+	}
+	defer in.Close()
+	out, err := os.Create(dst)
+	if err != nil {
+		return
+	}
+	defer func() {
+		cerr := out.Close()
+		if err == nil {
+			err = cerr
+		}
+	}()
+	if _, err = io.Copy(out, in); err != nil {
+		return
+	}
+	err = out.Sync()
+	return
+}
+
+type rewrite func(*ast.File) *ast.File
+
+// Mostly copied from gofmt
+func initRewrite(rewriteRule string) rewrite {
+	f := strings.Split(rewriteRule, "->")
+	if len(f) != 2 {
+		fmt.Fprintf(os.Stderr, "rewrite rule must be of the form 'pattern -> replacement'\n")
+		os.Exit(2)
+	}
+	pattern := parseExpr(f[0], "pattern")
+	replace := parseExpr(f[1], "replacement")
+	return func(p *ast.File) *ast.File { return rewriteFile(pattern, replace, p) }
+}
+
+// parseExpr parses s as an expression.
+// It might make sense to expand this to allow statement patterns,
+// but there are problems with preserving formatting and also
+// with what a wildcard for a statement looks like.
+func parseExpr(s, what string) ast.Expr {
+	x, err := parser.ParseExpr(s)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "parsing %s %s at %s\n", what, s, err)
+		os.Exit(2)
+	}
+	return x
+}
+
+// Keep this function for debugging.
+/*
+func dump(msg string, val reflect.Value) {
+	fmt.Printf("%s:\n", msg)
+	ast.Print(fileSet, val.Interface())
+	fmt.Println()
+}
+*/
+
+// rewriteFile applies the rewrite rule 'pattern -> replace' to an entire file.
+func rewriteFile(pattern, replace ast.Expr, p *ast.File) *ast.File {
+	cmap := ast.NewCommentMap(fileSet, p, p.Comments)
+	m := make(map[string]reflect.Value)
+	pat := reflect.ValueOf(pattern)
+	repl := reflect.ValueOf(replace)
+
+	var rewriteVal func(val reflect.Value) reflect.Value
+	rewriteVal = func(val reflect.Value) reflect.Value {
+		// don't bother if val is invalid to start with
+		if !val.IsValid() {
+			return reflect.Value{}
+		}
+		for k := range m {
+			delete(m, k)
+		}
+		val = apply(rewriteVal, val)
+		if match(m, pat, val) {
+			val = subst(m, repl, reflect.ValueOf(val.Interface().(ast.Node).Pos()))
+		}
+		return val
+	}
+
+	r := apply(rewriteVal, reflect.ValueOf(p)).Interface().(*ast.File)
+	r.Comments = cmap.Filter(r).Comments() // recreate comments list
+	return r
+}
+
+// set is a wrapper for x.Set(y); it protects the caller from panics if x cannot be changed to y.
+func set(x, y reflect.Value) {
+	// don't bother if x cannot be set or y is invalid
+	if !x.CanSet() || !y.IsValid() {
+		return
+	}
+	defer func() {
+		if x := recover(); x != nil {
+			if s, ok := x.(string); ok &&
+				(strings.Contains(s, "type mismatch") || strings.Contains(s, "not assignable")) {
+				// x cannot be set to y - ignore this rewrite
+				return
+			}
+			panic(x)
+		}
+	}()
+	x.Set(y)
+}
+
+// Values/types for special cases.
+var (
+	objectPtrNil = reflect.ValueOf((*ast.Object)(nil))
+	scopePtrNil  = reflect.ValueOf((*ast.Scope)(nil))
+
+	identType     = reflect.TypeOf((*ast.Ident)(nil))
+	objectPtrType = reflect.TypeOf((*ast.Object)(nil))
+	positionType  = reflect.TypeOf(token.NoPos)
+	callExprType  = reflect.TypeOf((*ast.CallExpr)(nil))
+	scopePtrType  = reflect.TypeOf((*ast.Scope)(nil))
+)
+
+// apply replaces each AST field x in val with f(x), returning val.
+// To avoid extra conversions, f operates on the reflect.Value form.
+func apply(f func(reflect.Value) reflect.Value, val reflect.Value) reflect.Value {
+	if !val.IsValid() {
+		return reflect.Value{}
+	}
+
+	// *ast.Objects introduce cycles and are likely incorrect after
+	// rewrite; don't follow them but replace with nil instead
+	if val.Type() == objectPtrType {
+		return objectPtrNil
+	}
+
+	// similarly for scopes: they are likely incorrect after a rewrite;
+	// replace them with nil
+	if val.Type() == scopePtrType {
+		return scopePtrNil
+	}
+
+	switch v := reflect.Indirect(val); v.Kind() {
+	case reflect.Slice:
+		for i := 0; i < v.Len(); i++ {
+			e := v.Index(i)
+			set(e, f(e))
+		}
+	case reflect.Struct:
+		for i := 0; i < v.NumField(); i++ {
+			e := v.Field(i)
+			set(e, f(e))
+		}
+	case reflect.Interface:
+		e := v.Elem()
+		set(v, f(e))
+	}
+	return val
+}
+
+func isWildcard(s string) bool {
+	rune, size := utf8.DecodeRuneInString(s)
+	return size == len(s) && unicode.IsLower(rune)
+}
+
+// match returns true if pattern matches val,
+// recording wildcard submatches in m.
+// If m == nil, match checks whether pattern == val.
+func match(m map[string]reflect.Value, pattern, val reflect.Value) bool {
+	// Wildcard matches any expression.  If it appears multiple
+	// times in the pattern, it must match the same expression
+	// each time.
+	if m != nil && pattern.IsValid() && pattern.Type() == identType {
+		name := pattern.Interface().(*ast.Ident).Name
+		if isWildcard(name) && val.IsValid() {
+			// wildcards only match valid (non-nil) expressions.
+			if _, ok := val.Interface().(ast.Expr); ok && !val.IsNil() {
+				if old, ok := m[name]; ok {
+					return match(nil, old, val)
+				}
+				m[name] = val
+				return true
+			}
+		}
+	}
+
+	// Otherwise, pattern and val must match recursively.
+	if !pattern.IsValid() || !val.IsValid() {
+		return !pattern.IsValid() && !val.IsValid()
+	}
+	if pattern.Type() != val.Type() {
+		return false
+	}
+
+	// Special cases.
+	switch pattern.Type() {
+	case identType:
+		// For identifiers, only the names need to match
+		// (and none of the other *ast.Object information).
+		// This is a common case, handle it all here instead
+		// of recursing down any further via reflection.
+		p := pattern.Interface().(*ast.Ident)
+		v := val.Interface().(*ast.Ident)
+		return p == nil && v == nil || p != nil && v != nil && p.Name == v.Name
+	case objectPtrType, positionType:
+		// object pointers and token positions always match
+		return true
+	case callExprType:
+		// For calls, the Ellipsis fields (token.Position) must
+		// match since that is how f(x) and f(x...) are different.
+		// Check them here but fall through for the remaining fields.
+		p := pattern.Interface().(*ast.CallExpr)
+		v := val.Interface().(*ast.CallExpr)
+		if p.Ellipsis.IsValid() != v.Ellipsis.IsValid() {
+			return false
+		}
+	}
+
+	p := reflect.Indirect(pattern)
+	v := reflect.Indirect(val)
+	if !p.IsValid() || !v.IsValid() {
+		return !p.IsValid() && !v.IsValid()
+	}
+
+	switch p.Kind() {
+	case reflect.Slice:
+		if p.Len() != v.Len() {
+			return false
+		}
+		for i := 0; i < p.Len(); i++ {
+			if !match(m, p.Index(i), v.Index(i)) {
+				return false
+			}
+		}
+		return true
+
+	case reflect.Struct:
+		for i := 0; i < p.NumField(); i++ {
+			if !match(m, p.Field(i), v.Field(i)) {
+				return false
+			}
+		}
+		return true
+
+	case reflect.Interface:
+		return match(m, p.Elem(), v.Elem())
+	}
+
+	// Handle token integers, etc.
+	return p.Interface() == v.Interface()
+}
+
+// subst returns a copy of pattern with values from m substituted in place
+// of wildcards and pos used as the position of tokens from the pattern.
+// if m == nil, subst returns a copy of pattern and doesn't change the line
+// number information.
+func subst(m map[string]reflect.Value, pattern reflect.Value, pos reflect.Value) reflect.Value {
+	if !pattern.IsValid() {
+		return reflect.Value{}
+	}
+
+	// Wildcard gets replaced with map value.
+	if m != nil && pattern.Type() == identType {
+		name := pattern.Interface().(*ast.Ident).Name
+		if isWildcard(name) {
+			if old, ok := m[name]; ok {
+				return subst(nil, old, reflect.Value{})
+			}
+		}
+	}
+
+	if pos.IsValid() && pattern.Type() == positionType {
+		// use new position only if old position was valid in the first place
+		if old := pattern.Interface().(token.Pos); !old.IsValid() {
+			return pattern
+		}
+		return pos
+	}
+
+	// Otherwise copy.
+	switch p := pattern; p.Kind() {
+	case reflect.Slice:
+		v := reflect.MakeSlice(p.Type(), p.Len(), p.Len())
+		for i := 0; i < p.Len(); i++ {
+			v.Index(i).Set(subst(m, p.Index(i), pos))
+		}
+		return v
+
+	case reflect.Struct:
+		v := reflect.New(p.Type()).Elem()
+		for i := 0; i < p.NumField(); i++ {
+			v.Field(i).Set(subst(m, p.Field(i), pos))
+		}
+		return v
+
+	case reflect.Ptr:
+		v := reflect.New(p.Type()).Elem()
+		if elem := p.Elem(); elem.IsValid() {
+			v.Set(subst(m, elem, pos).Addr())
+		}
+		return v
+
+	case reflect.Interface:
+		v := reflect.New(p.Type()).Elem()
+		if elem := p.Elem(); elem.IsValid() {
+			v.Set(subst(m, elem, pos))
+		}
+		return v
+	}
+
+	return pattern
+}

+ 26 - 0
vendor/github.com/klauspost/reedsolomon/.gitignore

@@ -0,0 +1,26 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
+
+.idea

+ 33 - 0
vendor/github.com/klauspost/reedsolomon/.travis.yml

@@ -0,0 +1,33 @@
+language: go
+
+sudo: false
+
+os:
+  - linux
+  - osx 
+
+go:
+  - 1.9.x
+  - 1.10.x
+  - 1.11.x
+  - 1.12.x
+  - master
+
+install:
+ - go get ./...
+
+script: 
+ - go vet ./...
+ - go test -v -cpu=1,2,4 .
+ - go test -v -cpu=1,2,4 -short -race .
+ - go test -tags=noasm -v -cpu=1,2,4 -short -race .
+ - go build examples/simple-decoder.go
+ - go build examples/simple-encoder.go
+ - go build examples/stream-decoder.go
+ - go build examples/stream-encoder.go
+ - diff <(gofmt -d .) <("")
+
+matrix:
+  allow_failures:
+    - go: 'master'
+  fast_finish: true

+ 2 - 2
vendor/github.com/templexxx/reedsolomon/LICENSE → vendor/github.com/klauspost/reedsolomon/LICENSE

@@ -1,6 +1,5 @@
-MIT License
+The MIT License (MIT)
 
-Copyright (c) 2017 Templexxx
 Copyright (c) 2015 Klaus Post
 Copyright (c) 2015 Backblaze
 
@@ -21,3 +20,4 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
+

+ 321 - 0
vendor/github.com/klauspost/reedsolomon/README.md

@@ -0,0 +1,321 @@
+# Reed-Solomon
+[![GoDoc][1]][2] [![Build Status][3]][4]
+
+[1]: https://godoc.org/github.com/klauspost/reedsolomon?status.svg
+[2]: https://godoc.org/github.com/klauspost/reedsolomon
+[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master
+[4]: https://travis-ci.org/klauspost/reedsolomon
+
+Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
+
+This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by [Backblaze](http://backblaze.com), with some additional optimizations.
+
+For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
+
+Package home: https://github.com/klauspost/reedsolomon
+
+Godoc: https://godoc.org/github.com/klauspost/reedsolomon
+
+# Installation
+To get the package use the standard:
+```bash
+go get -u github.com/klauspost/reedsolomon
+```
+
+# Changes
+
+## March 6, 2019
+
+The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
+
+## February 8, 2019
+
+AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2. See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
+
+## December 18, 2018
+
+Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
+
+## November 18, 2017
+
+Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
+
+## October 1, 2017
+
+* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option. Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
+* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines) has been increased for better multi-core scaling.
+* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to be used instead of allocating new memory.
+
+## August 26, 2017
+
+*  The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update` function contributed by [chenzhongtao](https://github.com/chenzhongtao).
+* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly, which gives a huge performance boost on this platform.
+
+## July 20, 2017
+
+`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface. This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
+```Go
+func (e *YourEnc) ReconstructData(shards [][]byte) error {
+	return ReconstructData(shards)
+}
+```
+
+You can of course also do your own implementation. The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder) handles this without modifying the interface. This is a good lesson on why returning interfaces is not a good design.
+
+# Usage
+
+This section assumes you know the basics of Reed-Solomon encoding. A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
+
+This package performs the calculation of the parity sets. The usage is therefore relatively simple.
+
+First of all, you need to choose your distribution of data and parity shards. A 'good' distribution is very subjective, and will depend a lot on your usage scenario. A good starting point is above 5 and below 257 data shards (the maximum supported number), and the number of parity shards to be 2 or above, and below the number of data shards.
+
+To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
+```Go
+    enc, err := reedsolomon.New(10, 3)
+```
+This encoder will work for all parity sets with this distribution of data and parity shards. The error will only be set if you specify 0 or negative values in any of the parameters, or if you specify more than 256 data shards.
+
+The you send and receive data  is a simple slice of byte slices; `[][]byte`. In the example above, the top slice must have a length of 13.
+```Go
+    data := make([][]byte, 13)
+```
+You should then fill the 10 first slices with *equally sized* data, and create parity shards that will be populated with parity data. In this case we create the data in memory, but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
+
+```Go
+    // Create all shards, size them at 50000 each
+    for i := range input {
+      data[i] := make([]byte, 50000)
+    }
+    
+    
+  // Fill some data into the data shards
+    for i, in := range data[:10] {
+      for j:= range in {
+         in[j] = byte((i+j)&0xff)
+      }
+    }
+```
+
+To populate the parity shards, you simply call `Encode()` with your data.
+```Go
+    err = enc.Encode(data)
+```
+The only cases where you should get an error is, if the data shards aren't of equal size. The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
+
+```Go
+    ok, err = enc.Verify(data)
+```
+
+The final (and important) part is to be able to reconstruct missing shards. For this to work, you need to know which parts of your data is missing. The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario, you need to implement a hash check for each shard. If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
+
+To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
+
+```Go
+    // Delete two data shards
+    data[3] = nil
+    data[7] = nil
+    
+    // Reconstruct the missing shards
+    err := enc.Reconstruct(data)
+```
+The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
+
+If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
+
+```Go
+    // Delete two data shards
+    data[3] = nil
+    data[7] = nil
+    
+    // Reconstruct just the missing data shards
+    err := enc.ReconstructData(data)
+```
+
+So to sum up reconstruction:
+* The number of data/parity shards must match the numbers used for encoding.
+* The order of shards must be the same as used when encoding.
+* You may only supply data you know is valid.
+* Invalid shards should be set to nil.
+
+For complete examples of an encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+
+# Splitting/Joining Data
+
+You might have a large slice of data. To help you split this, there are some helper functions that can split and join a single byte slice.
+
+```Go
+   bigfile, _ := ioutil.Readfile("myfile.data")
+   
+   // Split the file
+   split, err := enc.Split(bigfile)
+```
+This will split the file into the number of data shards set when creating the encoder and create empty parity shards. 
+
+An important thing to note is that you have to *keep track of the exact input size*. If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
+
+To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply: 
+```Go
+   // Join a data set and write it to io.Discard.
+   err = enc.Join(io.Discard, data, len(bigfile))
+```
+
+# Streaming/Merging
+
+It might seem like a limitation that all data should be in memory, but an important property is that *as long as the number of data/parity shards are the same, you can merge/split data sets*, and they will remain valid as a separate set.
+
+```Go
+    // Split the data set of 50000 elements into two of 25000
+    splitA := make([][]byte, 13)
+    splitB := make([][]byte, 13)
+    
+    // Merge into a 100000 element set
+    merged := make([][]byte, 13)
+    
+    for i := range data {
+      splitA[i] = data[i][:25000]
+      splitB[i] = data[i][25000:]
+      
+      // Concatenate it to itself
+	  merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
+	  merged[i] = append(merged[i], data[i]...)
+    }
+    
+    // Each part should still verify as ok.
+    ok, err := enc.Verify(splitA)
+    if ok && err == nil {
+        log.Println("splitA ok")
+    }
+    
+    ok, err = enc.Verify(splitB)
+    if ok && err == nil {
+        log.Println("splitB ok")
+    }
+    
+    ok, err = enc.Verify(merge)
+    if ok && err == nil {
+        log.Println("merge ok")
+    }
+```
+
+This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks. For the best throughput, don't use too small blocks.
+
+This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data. This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
+
+# Streaming API
+
+There has been added support for a streaming API, to help perform fully streaming operations, which enables you to do the same operations, but on streams. To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function to create the encoding/decoding interfaces. You can use [`NewStreamC`](https://godoc.org/github.com/klauspost/reedsolomon#NewStreamC) to ready an interface that reads/writes concurrently from the streams.
+
+Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API. Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API. 
+If an error occurs in relation to a stream, a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError) or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError) will help you determine which stream was the offender.
+
+There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
+
+For complete examples of a streaming encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+
+# Advanced Options
+
+You can modify internal options which affects how jobs are split between and processed by goroutines.
+
+To create options, use the WithXXX functions. You can supply options to `New`, `NewStream` and `NewStreamC`. If no Options are supplied, default options are used.
+
+Example of how to supply options:
+
+ ```Go
+     enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
+ ```
+
+
+# Performance
+Performance depends mainly on the number of parity shards. In rough terms, doubling the number of parity shards will double the encoding time.
+
+Here are the throughput numbers with some different selections of data and parity shards. For reference each shard is 1MB random data, and 2 CPU cores are used for encoding.
+
+| Data | Parity | Parity | MB/s   | SSSE3 MB/s  | SSSE3 Speed | Rel. Speed |
+|------|--------|--------|--------|-------------|-------------|------------|
+| 5    | 2      | 40%    | 576,11 | 2599,2      | 451%        | 100,00%    |
+| 10   | 2      | 20%    | 587,73 | 3100,28     | 528%        | 102,02%    |
+| 10   | 4      | 40%    | 298,38 | 2470,97     | 828%        | 51,79%     |
+| 50   | 20     | 40%    | 59,81  | 713,28      | 1193%       | 10,38%     |
+
+If `runtime.GOMAXPROCS()` is set to a value higher than 1, the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
+
+Example of performance scaling on Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz - 4 physical cores, 8 logical cores. The example uses 10 blocks with 16MB data each and 4 parity blocks.
+
+| Threads | MB/s    | Speed |
+|---------|---------|-------|
+| 1       | 1355,11 | 100%  |
+| 2       | 2339,78 | 172%  |
+| 4       | 3179,33 | 235%  |
+| 8       | 4346,18 | 321%  |
+
+Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
+```
+benchmark                            all MB/s     data MB/s    speedup
+BenchmarkReconstruct10x2x10000-8     2011.67      10530.10     5.23x
+BenchmarkReconstruct50x5x50000-8     4585.41      14301.60     3.12x
+BenchmarkReconstruct10x2x1M-8        8081.15      28216.41     3.49x
+BenchmarkReconstruct5x2x1M-8         5780.07      28015.37     4.85x
+BenchmarkReconstruct10x4x1M-8        4352.56      14367.61     3.30x
+BenchmarkReconstruct50x20x1M-8       1364.35      4189.79      3.07x
+BenchmarkReconstruct10x4x16M-8       1484.35      5779.53      3.89x
+```
+
+# Performance on AVX512
+
+The performance on AVX512 has been accelerated for Intel CPUs. This gives speedups on a per-core basis of up to 4x compared to AVX2 as can be seen in the following table:
+
+```
+$ benchcmp avx2.txt avx512.txt
+benchmark                      AVX2 MB/s    AVX512 MB/s   speedup
+BenchmarkEncode8x8x1M-72       1681.35      4125.64       2.45x
+BenchmarkEncode8x4x8M-72       1529.36      5507.97       3.60x
+BenchmarkEncode8x8x8M-72        791.16      2952.29       3.73x
+BenchmarkEncode8x8x32M-72       573.26      2168.61       3.78x
+BenchmarkEncode12x4x12M-72     1234.41      4912.37       3.98x
+BenchmarkEncode16x4x16M-72     1189.59      5138.01       4.32x
+BenchmarkEncode24x8x24M-72      690.68      2583.70       3.74x
+BenchmarkEncode24x8x48M-72      674.20      2643.31       3.92x
+```
+
+This speedup has been achieved by computing multiple parity blocks in parallel as opposed to one after the other. In doing so it is possible to minimize the memory bandwidth required for loading all data shards. At the same time the calculations are performed in the 512-bit wide ZMM registers and the surplus of ZMM registers (32 in total) is used to keep more data around (most notably the matrix coefficients).
+
+# Performance on ARM64 NEON
+
+By exploiting NEON instructions the performance for ARM has been accelerated. Below are the performance numbers for a single core on an ARM Cortex-A53 CPU @ 1.2GHz (Debian 8.0 Jessie running Go: 1.7.4):
+
+| Data | Parity | Parity | ARM64 Go MB/s | ARM64 NEON MB/s | NEON Speed |
+|------|--------|--------|--------------:|----------------:|-----------:|
+| 5    | 2      | 40%    |           189 |            1304 |       588% |
+| 10   | 2      | 20%    |           188 |            1738 |       925% |
+| 10   | 4      | 40%    |            96 |             839 |       877% |
+
+# Performance on ppc64le
+
+The performance for ppc64le has been accelerated. This gives roughly a 10x performance improvement on this architecture as can been seen below:
+
+```
+benchmark                      old MB/s     new MB/s     speedup
+BenchmarkGalois128K-160        948.87       8878.85      9.36x
+BenchmarkGalois1M-160          968.85       9041.92      9.33x
+BenchmarkGaloisXor128K-160     862.02       7905.00      9.17x
+BenchmarkGaloisXor1M-160       784.60       6296.65      8.03x
+```
+
+# asm2plan9s
+
+[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
+
+# Links
+* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
+* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
+* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
+* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
+* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
+* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
+* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
+* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
+
+# License
+
+This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.

+ 20 - 0
vendor/github.com/klauspost/reedsolomon/appveyor.yml

@@ -0,0 +1,20 @@
+os: Visual Studio 2015
+
+platform: x64
+
+clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
+
+# environment variables
+environment:
+  GOPATH: c:\gopath
+
+install:
+  - echo %PATH%
+  - echo %GOPATH%
+  - go version
+  - go env
+  - go get -d ./...
+
+build_script:
+  - go test -v -cpu=2 ./...
+  - go test -cpu=1,2,4 -short -race ./...

File diff suppressed because it is too large
+ 65 - 0
vendor/github.com/klauspost/reedsolomon/galois.go


+ 184 - 0
vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go

@@ -0,0 +1,184 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2019, Minio, Inc.
+
+package reedsolomon
+
+//go:noescape
+func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
+
+//go:noescape
+func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
+
+const (
+	dimIn        = 8                            // Number of input rows processed simultaneously
+	dimOut82     = 2                            // Number of output rows processed simultaneously for x2 routine
+	dimOut84     = 4                            // Number of output rows processed simultaneously for x4 routine
+	matrixSize82 = (16 + 16) * dimIn * dimOut82 // Dimension of slice of matrix coefficient passed into x2 routine
+	matrixSize84 = (16 + 16) * dimIn * dimOut84 // Dimension of slice of matrix coefficient passed into x4 routine
+)
+
+// Construct block of matrix coefficients for 2 outputs rows in parallel
+func setupMatrix82(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize82]byte) {
+	offset := 0
+	for c := inputOffset; c < inputOffset+dimIn; c++ {
+		for iRow := outputOffset; iRow < outputOffset+dimOut82; iRow++ {
+			if c < len(matrixRows[iRow]) {
+				coeff := matrixRows[iRow][c]
+				copy(matrix[offset*32:], mulTableLow[coeff][:])
+				copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
+			} else {
+				// coefficients not used for this input shard (so null out)
+				v := matrix[offset*32 : offset*32+32]
+				for i := range v {
+					v[i] = 0
+				}
+			}
+			offset += dimIn
+			if offset >= dimIn*dimOut82 {
+				offset -= dimIn*dimOut82 - 1
+			}
+		}
+	}
+}
+
+// Construct block of matrix coefficients for 4 outputs rows in parallel
+func setupMatrix84(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize84]byte) {
+	offset := 0
+	for c := inputOffset; c < inputOffset+dimIn; c++ {
+		for iRow := outputOffset; iRow < outputOffset+dimOut84; iRow++ {
+			if c < len(matrixRows[iRow]) {
+				coeff := matrixRows[iRow][c]
+				copy(matrix[offset*32:], mulTableLow[coeff][:])
+				copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
+			} else {
+				// coefficients not used for this input shard (so null out)
+				v := matrix[offset*32 : offset*32+32]
+				for i := range v {
+					v[i] = 0
+				}
+			}
+			offset += dimIn
+			if offset >= dimIn*dimOut84 {
+				offset -= dimIn*dimOut84 - 1
+			}
+		}
+	}
+}
+
+// Invoke AVX512 routine for 2 output rows in parallel
+func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset int) {
+	done := len(in[0])
+	if done == 0 {
+		return
+	}
+
+	inputEnd := inputOffset + dimIn
+	if inputEnd > len(in) {
+		inputEnd = len(in)
+	}
+	outputEnd := outputOffset + dimOut82
+	if outputEnd > len(out) {
+		outputEnd = len(out)
+	}
+
+	matrix82 := [matrixSize82]byte{}
+	setupMatrix82(matrixRows, inputOffset, outputOffset, &matrix82)
+	addTo := inputOffset != 0 // Except for the first input column, add to previous results
+	_galMulAVX512Parallel82(in[inputOffset:inputEnd], out[outputOffset:outputEnd], &matrix82, addTo)
+
+	done = (done >> 6) << 6
+	if len(in[0])-done == 0 {
+		return
+	}
+
+	for c := inputOffset; c < inputOffset+dimIn; c++ {
+		for iRow := outputOffset; iRow < outputOffset+dimOut82; iRow++ {
+			if c < len(matrixRows[iRow]) {
+				mt := mulTable[matrixRows[iRow][c]][:256]
+				for i := done; i < len(in[0]); i++ {
+					if c == 0 { // only set value for first input column
+						out[iRow][i] = mt[in[c][i]]
+					} else { // and add for all others
+						out[iRow][i] ^= mt[in[c][i]]
+					}
+				}
+			}
+		}
+	}
+}
+
+// Invoke AVX512 routine for 4 output rows in parallel
+func galMulAVX512Parallel84(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset int) {
+	done := len(in[0])
+	if done == 0 {
+		return
+	}
+
+	inputEnd := inputOffset + dimIn
+	if inputEnd > len(in) {
+		inputEnd = len(in)
+	}
+	outputEnd := outputOffset + dimOut84
+	if outputEnd > len(out) {
+		outputEnd = len(out)
+	}
+
+	matrix84 := [matrixSize84]byte{}
+	setupMatrix84(matrixRows, inputOffset, outputOffset, &matrix84)
+	addTo := inputOffset != 0 // Except for the first input column, add to previous results
+	_galMulAVX512Parallel84(in[inputOffset:inputEnd], out[outputOffset:outputEnd], &matrix84, addTo)
+
+	done = (done >> 6) << 6
+	if len(in[0])-done == 0 {
+		return
+	}
+
+	for c := inputOffset; c < inputOffset+dimIn; c++ {
+		for iRow := outputOffset; iRow < outputOffset+dimOut84; iRow++ {
+			if c < len(matrixRows[iRow]) {
+				mt := mulTable[matrixRows[iRow][c]][:256]
+				for i := done; i < len(in[0]); i++ {
+					if c == 0 { // only set value for first input column
+						out[iRow][i] = mt[in[c][i]]
+					} else { // and add for all others
+						out[iRow][i] ^= mt[in[c][i]]
+					}
+				}
+			}
+		}
+	}
+}
+
+// Perform the same as codeSomeShards, but taking advantage of
+// AVX512 parallelism for up to 4x faster execution as compared to AVX2
+func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+	outputRow := 0
+	// First process (multiple) batches of 4 output rows in parallel
+	for ; outputRow+dimOut84 <= len(outputs); outputRow += dimOut84 {
+		for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+			galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow)
+		}
+	}
+	// Then process a (single) batch of 2 output rows in parallel
+	if outputRow+dimOut82 <= len(outputs) {
+		// fmt.Println(outputRow, len(outputs))
+		for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+			galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow)
+		}
+		outputRow += dimOut82
+	}
+	// Lastly, we may have a single output row left (for uneven parity)
+	if outputRow < len(outputs) {
+		for c := 0; c < r.DataShards; c++ {
+			if c == 0 {
+				galMulSlice(matrixRows[outputRow][c], inputs[c], outputs[outputRow], &r.o)
+			} else {
+				galMulSliceXor(matrixRows[outputRow][c], inputs[c], outputs[outputRow], &r.o)
+			}
+		}
+	}
+}

+ 590 - 0
vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s

@@ -0,0 +1,590 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2019, Minio, Inc.
+
+//
+// Process 2 output rows in parallel from a total of 8 input rows
+//
+// func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
+TEXT ·_galMulAVX512Parallel82(SB), 7, $0
+	MOVQ  in+0(FP), SI     //
+	MOVQ  8(SI), R9        // R9: len(in)
+	SHRQ  $6, R9           // len(in) / 64
+	TESTQ R9, R9
+	JZ    done_avx512_parallel82
+
+	MOVQ matrix+48(FP), SI
+	LONG $0x48fee162; WORD $0x066f // VMOVDQU64 ZMM16, 0x000[rsi]
+	LONG $0x48fee162; WORD $0x4e6f; BYTE $0x01 // VMOVDQU64 ZMM17, 0x040[rsi]
+	LONG $0x48fee162; WORD $0x566f; BYTE $0x02 // VMOVDQU64 ZMM18, 0x080[rsi]
+	LONG $0x48fee162; WORD $0x5e6f; BYTE $0x03 // VMOVDQU64 ZMM19, 0x0c0[rsi]
+	LONG $0x48fee162; WORD $0x666f; BYTE $0x04 // VMOVDQU64 ZMM20, 0x100[rsi]
+	LONG $0x48fee162; WORD $0x6e6f; BYTE $0x05 // VMOVDQU64 ZMM21, 0x140[rsi]
+	LONG $0x48fee162; WORD $0x766f; BYTE $0x06 // VMOVDQU64 ZMM22, 0x180[rsi]
+	LONG $0x48fee162; WORD $0x7e6f; BYTE $0x07 // VMOVDQU64 ZMM23, 0x1c0[rsi]
+
+	MOVQ         $15, BX
+	MOVQ         BX, X5
+	LONG $0x487df262; WORD $0xd578 // VPBROADCASTB ZMM2, XMM5
+
+	MOVB addTo+56(FP), AX
+	LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1
+	WORD $0xf749; BYTE $0xe0 // mul r8
+	LONG $0x92fbe1c4; BYTE $0xc8 // kmovq k1, rax
+	MOVQ in+0(FP), SI  //  SI: &in
+	MOVQ in_len+8(FP), AX  // number of inputs
+	XORQ R11, R11
+	MOVQ out+24(FP), DX
+	MOVQ 24(DX), CX    //  CX: &out[1][0]
+	MOVQ (DX), DX      //  DX: &out[0][0]
+
+loopback_avx512_parallel82:
+	LONG $0xc9fef162; WORD $0x226f // VMOVDQU64 ZMM4{k1}{z}, [rdx]
+	LONG $0xc9fef162; WORD $0x296f // VMOVDQU64 ZMM5{k1}{z}, [rcx]
+
+	MOVQ (SI), BX      //  BX: &in[0][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40fd3362; WORD $0xf043; BYTE $0x00 // VSHUFI64x2 ZMM14, ZMM16, ZMM16, 0x00
+	LONG $0x40fd3362; WORD $0xf843; BYTE $0x55 // VSHUFI64x2 ZMM15, ZMM16, ZMM16, 0x55
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40dd3362; WORD $0xe443; BYTE $0x00 // VSHUFI64x2 ZMM12, ZMM20, ZMM20, 0x00
+	LONG $0x40dd3362; WORD $0xec43; BYTE $0x55 // VSHUFI64x2 ZMM13, ZMM20, ZMM20, 0x55
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+    CMPQ AX, $1
+    JE skip_avx512_parallel82
+
+ 	MOVQ 24(SI), BX    //  BX: &in[1][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40fd3362; WORD $0xf043; BYTE $0xaa // VSHUFI64x2 ZMM14, ZMM16, ZMM16, 0xaa
+	LONG $0x40fd3362; WORD $0xf843; BYTE $0xff // VSHUFI64x2 ZMM15, ZMM16, ZMM16, 0xff
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40dd3362; WORD $0xe443; BYTE $0xaa // VSHUFI64x2 ZMM12, ZMM20, ZMM20, 0xaa
+	LONG $0x40dd3362; WORD $0xec43; BYTE $0xff // VSHUFI64x2 ZMM13, ZMM20, ZMM20, 0xff
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+    CMPQ AX, $2
+    JE skip_avx512_parallel82
+
+	MOVQ 48(SI), BX    //  BX: &in[2][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40f53362; WORD $0xf143; BYTE $0x00 // VSHUFI64x2 ZMM14, ZMM17, ZMM17, 0x00
+	LONG $0x40f53362; WORD $0xf943; BYTE $0x55 // VSHUFI64x2 ZMM15, ZMM17, ZMM17, 0x55
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40d53362; WORD $0xe543; BYTE $0x00 // VSHUFI64x2 ZMM12, ZMM21, ZMM21, 0x00
+	LONG $0x40d53362; WORD $0xed43; BYTE $0x55 // VSHUFI64x2 ZMM13, ZMM21, ZMM21, 0x55
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+    CMPQ AX, $3
+    JE skip_avx512_parallel82
+
+	MOVQ 72(SI), BX    // BX: &in[3][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40f53362; WORD $0xf143; BYTE $0xaa // VSHUFI64x2 ZMM14, ZMM17, ZMM17, 0xaa
+	LONG $0x40f53362; WORD $0xf943; BYTE $0xff // VSHUFI64x2 ZMM15, ZMM17, ZMM17, 0xff
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40d53362; WORD $0xe543; BYTE $0xaa // VSHUFI64x2 ZMM12, ZMM21, ZMM21, 0xaa
+	LONG $0x40d53362; WORD $0xed43; BYTE $0xff // VSHUFI64x2 ZMM13, ZMM21, ZMM21, 0xff
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+    CMPQ AX, $4
+    JE skip_avx512_parallel82
+
+	MOVQ 96(SI), BX    // BX: &in[4][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40ed3362; WORD $0xf243; BYTE $0x00 // VSHUFI64x2 ZMM14, ZMM18, ZMM18, 0x00
+	LONG $0x40ed3362; WORD $0xfa43; BYTE $0x55 // VSHUFI64x2 ZMM15, ZMM18, ZMM18, 0x55
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40cd3362; WORD $0xe643; BYTE $0x00 // VSHUFI64x2 ZMM12, ZMM22, ZMM22, 0x00
+	LONG $0x40cd3362; WORD $0xee43; BYTE $0x55 // VSHUFI64x2 ZMM13, ZMM22, ZMM22, 0x55
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+    CMPQ AX, $5
+    JE skip_avx512_parallel82
+
+	MOVQ 120(SI), BX   // BX: &in[5][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40ed3362; WORD $0xf243; BYTE $0xaa // VSHUFI64x2 ZMM14, ZMM18, ZMM18, 0xaa
+	LONG $0x40ed3362; WORD $0xfa43; BYTE $0xff // VSHUFI64x2 ZMM15, ZMM18, ZMM18, 0xff
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40cd3362; WORD $0xe643; BYTE $0xaa // VSHUFI64x2 ZMM12, ZMM22, ZMM22, 0xaa
+	LONG $0x40cd3362; WORD $0xee43; BYTE $0xff // VSHUFI64x2 ZMM13, ZMM22, ZMM22, 0xff
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+    CMPQ AX, $6
+    JE skip_avx512_parallel82
+
+	MOVQ 144(SI), BX   // BX: &in[6][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40e53362; WORD $0xf343; BYTE $0x00 // VSHUFI64x2 ZMM14, ZMM19, ZMM19, 0x00
+	LONG $0x40e53362; WORD $0xfb43; BYTE $0x55 // VSHUFI64x2 ZMM15, ZMM19, ZMM19, 0x55
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40c53362; WORD $0xe743; BYTE $0x00 // VSHUFI64x2 ZMM12, ZMM23, ZMM23, 0x00
+	LONG $0x40c53362; WORD $0xef43; BYTE $0x55 // VSHUFI64x2 ZMM13, ZMM23, ZMM23, 0x55
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+    CMPQ AX, $7
+    JE skip_avx512_parallel82
+
+	MOVQ 168(SI), BX   //  BX: &in[7][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40e53362; WORD $0xf343; BYTE $0xaa // VSHUFI64x2 ZMM14, ZMM19, ZMM19, 0xaa
+	LONG $0x40e53362; WORD $0xfb43; BYTE $0xff // VSHUFI64x2 ZMM15, ZMM19, ZMM19, 0xff
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40c53362; WORD $0xe743; BYTE $0xaa // VSHUFI64x2 ZMM12, ZMM23, ZMM23, 0xaa
+	LONG $0x40c53362; WORD $0xef43; BYTE $0xff // VSHUFI64x2 ZMM13, ZMM23, ZMM23, 0xff
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+skip_avx512_parallel82:
+	LONG $0x48fef162; WORD $0x227f // VMOVDQU64 [rdx], ZMM4
+	LONG $0x48fef162; WORD $0x297f // VMOVDQU64 [rcx], ZMM5
+
+	ADDQ $64, R11 // in4+=64
+
+	ADDQ $64, DX  // out+=64
+	ADDQ $64, CX  // out2+=64
+
+	SUBQ $1, R9
+	JNZ  loopback_avx512_parallel82
+
+done_avx512_parallel82:
+	VZEROUPPER
+	RET
+
+//
+// Process 4 output rows in parallel from a total of 8 input rows
+//
+// func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
+TEXT ·_galMulAVX512Parallel84(SB), 7, $0
+	MOVQ  in+0(FP), SI     //
+	MOVQ  8(SI), R9        // R9: len(in)
+	SHRQ  $6, R9           // len(in) / 64
+	TESTQ R9, R9
+	JZ    done_avx512_parallel84
+
+	MOVQ matrix+48(FP), SI
+	LONG $0x48fee162; WORD $0x066f // VMOVDQU64 ZMM16, 0x000[rsi]
+	LONG $0x48fee162; WORD $0x4e6f; BYTE $0x01 // VMOVDQU64 ZMM17, 0x040[rsi]
+	LONG $0x48fee162; WORD $0x566f; BYTE $0x02 // VMOVDQU64 ZMM18, 0x080[rsi]
+	LONG $0x48fee162; WORD $0x5e6f; BYTE $0x03 // VMOVDQU64 ZMM19, 0x0c0[rsi]
+	LONG $0x48fee162; WORD $0x666f; BYTE $0x04 // VMOVDQU64 ZMM20, 0x100[rsi]
+	LONG $0x48fee162; WORD $0x6e6f; BYTE $0x05 // VMOVDQU64 ZMM21, 0x140[rsi]
+	LONG $0x48fee162; WORD $0x766f; BYTE $0x06 // VMOVDQU64 ZMM22, 0x180[rsi]
+	LONG $0x48fee162; WORD $0x7e6f; BYTE $0x07 // VMOVDQU64 ZMM23, 0x1c0[rsi]
+	LONG $0x48fe6162; WORD $0x466f; BYTE $0x08 // VMOVDQU64 ZMM24, 0x200[rsi]
+	LONG $0x48fe6162; WORD $0x4e6f; BYTE $0x09 // VMOVDQU64 ZMM25, 0x240[rsi]
+	LONG $0x48fe6162; WORD $0x566f; BYTE $0x0a // VMOVDQU64 ZMM26, 0x280[rsi]
+	LONG $0x48fe6162; WORD $0x5e6f; BYTE $0x0b // VMOVDQU64 ZMM27, 0x2c0[rsi]
+	LONG $0x48fe6162; WORD $0x666f; BYTE $0x0c // VMOVDQU64 ZMM28, 0x300[rsi]
+	LONG $0x48fe6162; WORD $0x6e6f; BYTE $0x0d // VMOVDQU64 ZMM29, 0x340[rsi]
+	LONG $0x48fe6162; WORD $0x766f; BYTE $0x0e // VMOVDQU64 ZMM30, 0x380[rsi]
+	LONG $0x48fe6162; WORD $0x7e6f; BYTE $0x0f // VMOVDQU64 ZMM31, 0x3c0[rsi]
+
+	MOVQ         $15, BX
+	MOVQ         BX, X5
+	LONG $0x487df262; WORD $0xd578 // VPBROADCASTB ZMM2, XMM5
+
+	MOVB addTo+56(FP), AX
+	LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1
+	WORD $0xf749; BYTE $0xe0 // mul r8
+	LONG $0x92fbe1c4; BYTE $0xc8 // kmovq k1, rax
+	MOVQ in+0(FP), SI  //  SI: &in
+	MOVQ in_len+8(FP), AX  // number of inputs
+	XORQ R11, R11
+	MOVQ out+24(FP), DX
+	MOVQ 24(DX), CX    //  CX: &out[1][0]
+	MOVQ 48(DX), R10   // R10: &out[2][0]
+	MOVQ 72(DX), R12   // R12: &out[3][0]
+	MOVQ (DX), DX      //  DX: &out[0][0]
+
+loopback_avx512_parallel84:
+	LONG $0xc9fef162; WORD $0x226f // VMOVDQU64 ZMM4{k1}{z}, [rdx]
+	LONG $0xc9fef162; WORD $0x296f // VMOVDQU64 ZMM5{k1}{z}, [rcx]
+	LONG $0xc9fed162; WORD $0x326f // VMOVDQU64 ZMM6{k1}{z}, [r10]
+	LONG $0xc9fed162; WORD $0x3c6f; BYTE $0x24 // VMOVDQU64 ZMM7{k1}{z}, [r12]
+
+	MOVQ (SI), BX      //  BX: &in[0][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40fd3362; WORD $0xf043; BYTE $0x00 // VSHUFI64x2 ZMM14, ZMM16, ZMM16, 0x00
+	LONG $0x40fd3362; WORD $0xf843; BYTE $0x55 // VSHUFI64x2 ZMM15, ZMM16, ZMM16, 0x55
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40dd3362; WORD $0xe443; BYTE $0x00 // VSHUFI64x2 ZMM12, ZMM20, ZMM20, 0x00
+	LONG $0x40dd3362; WORD $0xec43; BYTE $0x55 // VSHUFI64x2 ZMM13, ZMM20, ZMM20, 0x55
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+	LONG $0x40bd1362; WORD $0xd043; BYTE $0x00 // VSHUFI64x2 ZMM10, ZMM24, ZMM24, 0x00
+	LONG $0x40bd1362; WORD $0xd843; BYTE $0x55 // VSHUFI64x2 ZMM11, ZMM24, ZMM24, 0x55
+	LONG $0x482d7262; WORD $0xd000 // VPSHUFB  ZMM10, ZMM10, ZMM0  ; mul low part
+	LONG $0x48257262; WORD $0xd900 // VPSHUFB  ZMM11, ZMM11, ZMM1  ; mul high part
+	LONG $0x48ad5162; WORD $0xd3ef // VPXORQ   ZMM10, ZMM10, ZMM11  ; result
+	LONG $0x48cdd162; WORD $0xf2ef // VPXORQ   ZMM6, ZMM6, ZMM10
+
+	LONG $0x409d1362; WORD $0xc443; BYTE $0x00 // VSHUFI64x2 ZMM8, ZMM28, ZMM28, 0x00
+	LONG $0x409d1362; WORD $0xcc43; BYTE $0x55 // VSHUFI64x2 ZMM9, ZMM28, ZMM28, 0x55
+	LONG $0x483d7262; WORD $0xc000 // VPSHUFB  ZMM8, ZMM8, ZMM0  ; mul low part
+	LONG $0x48357262; WORD $0xc900 // VPSHUFB  ZMM9, ZMM9, ZMM1  ; mul high part
+	LONG $0x48bd5162; WORD $0xc1ef // VPXORQ   ZMM8, ZMM8, ZMM9  ; result
+	LONG $0x48c5d162; WORD $0xf8ef // VPXORQ   ZMM7, ZMM7, ZMM8
+
+	CMPQ AX, $1
+	JE skip_avx512_parallel84
+
+     MOVQ 24(SI), BX    //  BX: &in[1][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40fd3362; WORD $0xf043; BYTE $0xaa // VSHUFI64x2 ZMM14, ZMM16, ZMM16, 0xaa
+	LONG $0x40fd3362; WORD $0xf843; BYTE $0xff // VSHUFI64x2 ZMM15, ZMM16, ZMM16, 0xff
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40dd3362; WORD $0xe443; BYTE $0xaa // VSHUFI64x2 ZMM12, ZMM20, ZMM20, 0xaa
+	LONG $0x40dd3362; WORD $0xec43; BYTE $0xff // VSHUFI64x2 ZMM13, ZMM20, ZMM20, 0xff
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+	LONG $0x40bd1362; WORD $0xd043; BYTE $0xaa // VSHUFI64x2 ZMM10, ZMM24, ZMM24, 0xaa
+	LONG $0x40bd1362; WORD $0xd843; BYTE $0xff // VSHUFI64x2 ZMM11, ZMM24, ZMM24, 0xff
+	LONG $0x482d7262; WORD $0xd000 // VPSHUFB  ZMM10, ZMM10, ZMM0  ; mul low part
+	LONG $0x48257262; WORD $0xd900 // VPSHUFB  ZMM11, ZMM11, ZMM1  ; mul high part
+	LONG $0x48ad5162; WORD $0xd3ef // VPXORQ   ZMM10, ZMM10, ZMM11  ; result
+	LONG $0x48cdd162; WORD $0xf2ef // VPXORQ   ZMM6, ZMM6, ZMM10
+
+	LONG $0x409d1362; WORD $0xc443; BYTE $0xaa // VSHUFI64x2 ZMM8, ZMM28, ZMM28, 0xaa
+	LONG $0x409d1362; WORD $0xcc43; BYTE $0xff // VSHUFI64x2 ZMM9, ZMM28, ZMM28, 0xff
+	LONG $0x483d7262; WORD $0xc000 // VPSHUFB  ZMM8, ZMM8, ZMM0  ; mul low part
+	LONG $0x48357262; WORD $0xc900 // VPSHUFB  ZMM9, ZMM9, ZMM1  ; mul high part
+	LONG $0x48bd5162; WORD $0xc1ef // VPXORQ   ZMM8, ZMM8, ZMM9  ; result
+	LONG $0x48c5d162; WORD $0xf8ef // VPXORQ   ZMM7, ZMM7, ZMM8
+
+	CMPQ AX, $2
+	JE skip_avx512_parallel84
+
+	MOVQ 48(SI), BX    //  BX: &in[2][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40f53362; WORD $0xf143; BYTE $0x00 // VSHUFI64x2 ZMM14, ZMM17, ZMM17, 0x00
+	LONG $0x40f53362; WORD $0xf943; BYTE $0x55 // VSHUFI64x2 ZMM15, ZMM17, ZMM17, 0x55
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40d53362; WORD $0xe543; BYTE $0x00 // VSHUFI64x2 ZMM12, ZMM21, ZMM21, 0x00
+	LONG $0x40d53362; WORD $0xed43; BYTE $0x55 // VSHUFI64x2 ZMM13, ZMM21, ZMM21, 0x55
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+	LONG $0x40b51362; WORD $0xd143; BYTE $0x00 // VSHUFI64x2 ZMM10, ZMM25, ZMM25, 0x00
+	LONG $0x40b51362; WORD $0xd943; BYTE $0x55 // VSHUFI64x2 ZMM11, ZMM25, ZMM25, 0x55
+	LONG $0x482d7262; WORD $0xd000 // VPSHUFB  ZMM10, ZMM10, ZMM0  ; mul low part
+	LONG $0x48257262; WORD $0xd900 // VPSHUFB  ZMM11, ZMM11, ZMM1  ; mul high part
+	LONG $0x48ad5162; WORD $0xd3ef // VPXORQ   ZMM10, ZMM10, ZMM11  ; result
+	LONG $0x48cdd162; WORD $0xf2ef // VPXORQ   ZMM6, ZMM6, ZMM10
+
+	LONG $0x40951362; WORD $0xc543; BYTE $0x00 // VSHUFI64x2 ZMM8, ZMM29, ZMM29, 0x00
+	LONG $0x40951362; WORD $0xcd43; BYTE $0x55 // VSHUFI64x2 ZMM9, ZMM29, ZMM29, 0x55
+	LONG $0x483d7262; WORD $0xc000 // VPSHUFB  ZMM8, ZMM8, ZMM0  ; mul low part
+	LONG $0x48357262; WORD $0xc900 // VPSHUFB  ZMM9, ZMM9, ZMM1  ; mul high part
+	LONG $0x48bd5162; WORD $0xc1ef // VPXORQ   ZMM8, ZMM8, ZMM9  ; result
+	LONG $0x48c5d162; WORD $0xf8ef // VPXORQ   ZMM7, ZMM7, ZMM8
+
+	CMPQ AX, $3
+	JE skip_avx512_parallel84
+
+	MOVQ 72(SI), BX    // BX: &in[3][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40f53362; WORD $0xf143; BYTE $0xaa // VSHUFI64x2 ZMM14, ZMM17, ZMM17, 0xaa
+	LONG $0x40f53362; WORD $0xf943; BYTE $0xff // VSHUFI64x2 ZMM15, ZMM17, ZMM17, 0xff
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40d53362; WORD $0xe543; BYTE $0xaa // VSHUFI64x2 ZMM12, ZMM21, ZMM21, 0xaa
+	LONG $0x40d53362; WORD $0xed43; BYTE $0xff // VSHUFI64x2 ZMM13, ZMM21, ZMM21, 0xff
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+	LONG $0x40b51362; WORD $0xd143; BYTE $0xaa // VSHUFI64x2 ZMM10, ZMM25, ZMM25, 0xaa
+	LONG $0x40b51362; WORD $0xd943; BYTE $0xff // VSHUFI64x2 ZMM11, ZMM25, ZMM25, 0xff
+	LONG $0x482d7262; WORD $0xd000 // VPSHUFB  ZMM10, ZMM10, ZMM0  ; mul low part
+	LONG $0x48257262; WORD $0xd900 // VPSHUFB  ZMM11, ZMM11, ZMM1  ; mul high part
+	LONG $0x48ad5162; WORD $0xd3ef // VPXORQ   ZMM10, ZMM10, ZMM11  ; result
+	LONG $0x48cdd162; WORD $0xf2ef // VPXORQ   ZMM6, ZMM6, ZMM10
+
+	LONG $0x40951362; WORD $0xc543; BYTE $0xaa // VSHUFI64x2 ZMM8, ZMM29, ZMM29, 0xaa
+	LONG $0x40951362; WORD $0xcd43; BYTE $0xff // VSHUFI64x2 ZMM9, ZMM29, ZMM29, 0xff
+	LONG $0x483d7262; WORD $0xc000 // VPSHUFB  ZMM8, ZMM8, ZMM0  ; mul low part
+	LONG $0x48357262; WORD $0xc900 // VPSHUFB  ZMM9, ZMM9, ZMM1  ; mul high part
+	LONG $0x48bd5162; WORD $0xc1ef // VPXORQ   ZMM8, ZMM8, ZMM9  ; result
+	LONG $0x48c5d162; WORD $0xf8ef // VPXORQ   ZMM7, ZMM7, ZMM8
+
+	CMPQ AX, $4
+	JE skip_avx512_parallel84
+
+	MOVQ 96(SI), BX    // BX: &in[4][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40ed3362; WORD $0xf243; BYTE $0x00 // VSHUFI64x2 ZMM14, ZMM18, ZMM18, 0x00
+	LONG $0x40ed3362; WORD $0xfa43; BYTE $0x55 // VSHUFI64x2 ZMM15, ZMM18, ZMM18, 0x55
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40cd3362; WORD $0xe643; BYTE $0x00 // VSHUFI64x2 ZMM12, ZMM22, ZMM22, 0x00
+	LONG $0x40cd3362; WORD $0xee43; BYTE $0x55 // VSHUFI64x2 ZMM13, ZMM22, ZMM22, 0x55
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+	LONG $0x40ad1362; WORD $0xd243; BYTE $0x00 // VSHUFI64x2 ZMM10, ZMM26, ZMM26, 0x00
+	LONG $0x40ad1362; WORD $0xda43; BYTE $0x55 // VSHUFI64x2 ZMM11, ZMM26, ZMM26, 0x55
+	LONG $0x482d7262; WORD $0xd000 // VPSHUFB  ZMM10, ZMM10, ZMM0  ; mul low part
+	LONG $0x48257262; WORD $0xd900 // VPSHUFB  ZMM11, ZMM11, ZMM1  ; mul high part
+	LONG $0x48ad5162; WORD $0xd3ef // VPXORQ   ZMM10, ZMM10, ZMM11  ; result
+	LONG $0x48cdd162; WORD $0xf2ef // VPXORQ   ZMM6, ZMM6, ZMM10
+
+	LONG $0x408d1362; WORD $0xc643; BYTE $0x00 // VSHUFI64x2 ZMM8, ZMM30, ZMM30, 0x00
+	LONG $0x408d1362; WORD $0xce43; BYTE $0x55 // VSHUFI64x2 ZMM9, ZMM30, ZMM30, 0x55
+	LONG $0x483d7262; WORD $0xc000 // VPSHUFB  ZMM8, ZMM8, ZMM0  ; mul low part
+	LONG $0x48357262; WORD $0xc900 // VPSHUFB  ZMM9, ZMM9, ZMM1  ; mul high part
+	LONG $0x48bd5162; WORD $0xc1ef // VPXORQ   ZMM8, ZMM8, ZMM9  ; result
+	LONG $0x48c5d162; WORD $0xf8ef // VPXORQ   ZMM7, ZMM7, ZMM8
+
+	CMPQ AX, $5
+	JE skip_avx512_parallel84
+
+	MOVQ 120(SI), BX   // BX: &in[5][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40ed3362; WORD $0xf243; BYTE $0xaa // VSHUFI64x2 ZMM14, ZMM18, ZMM18, 0xaa
+	LONG $0x40ed3362; WORD $0xfa43; BYTE $0xff // VSHUFI64x2 ZMM15, ZMM18, ZMM18, 0xff
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40cd3362; WORD $0xe643; BYTE $0xaa // VSHUFI64x2 ZMM12, ZMM22, ZMM22, 0xaa
+	LONG $0x40cd3362; WORD $0xee43; BYTE $0xff // VSHUFI64x2 ZMM13, ZMM22, ZMM22, 0xff
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+	LONG $0x40ad1362; WORD $0xd243; BYTE $0xaa // VSHUFI64x2 ZMM10, ZMM26, ZMM26, 0xaa
+	LONG $0x40ad1362; WORD $0xda43; BYTE $0xff // VSHUFI64x2 ZMM11, ZMM26, ZMM26, 0xff
+	LONG $0x482d7262; WORD $0xd000 // VPSHUFB  ZMM10, ZMM10, ZMM0  ; mul low part
+	LONG $0x48257262; WORD $0xd900 // VPSHUFB  ZMM11, ZMM11, ZMM1  ; mul high part
+	LONG $0x48ad5162; WORD $0xd3ef // VPXORQ   ZMM10, ZMM10, ZMM11  ; result
+	LONG $0x48cdd162; WORD $0xf2ef // VPXORQ   ZMM6, ZMM6, ZMM10
+
+	LONG $0x408d1362; WORD $0xc643; BYTE $0xaa // VSHUFI64x2 ZMM8, ZMM30, ZMM30, 0xaa
+	LONG $0x408d1362; WORD $0xce43; BYTE $0xff // VSHUFI64x2 ZMM9, ZMM30, ZMM30, 0xff
+	LONG $0x483d7262; WORD $0xc000 // VPSHUFB  ZMM8, ZMM8, ZMM0  ; mul low part
+	LONG $0x48357262; WORD $0xc900 // VPSHUFB  ZMM9, ZMM9, ZMM1  ; mul high part
+	LONG $0x48bd5162; WORD $0xc1ef // VPXORQ   ZMM8, ZMM8, ZMM9  ; result
+	LONG $0x48c5d162; WORD $0xf8ef // VPXORQ   ZMM7, ZMM7, ZMM8
+
+	CMPQ AX, $6
+	JE skip_avx512_parallel84
+
+	MOVQ 144(SI), BX   // BX: &in[6][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40e53362; WORD $0xf343; BYTE $0x00 // VSHUFI64x2 ZMM14, ZMM19, ZMM19, 0x00
+	LONG $0x40e53362; WORD $0xfb43; BYTE $0x55 // VSHUFI64x2 ZMM15, ZMM19, ZMM19, 0x55
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40c53362; WORD $0xe743; BYTE $0x00 // VSHUFI64x2 ZMM12, ZMM23, ZMM23, 0x00
+	LONG $0x40c53362; WORD $0xef43; BYTE $0x55 // VSHUFI64x2 ZMM13, ZMM23, ZMM23, 0x55
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+	LONG $0x40a51362; WORD $0xd343; BYTE $0x00 // VSHUFI64x2 ZMM10, ZMM27, ZMM27, 0x00
+	LONG $0x40a51362; WORD $0xdb43; BYTE $0x55 // VSHUFI64x2 ZMM11, ZMM27, ZMM27, 0x55
+	LONG $0x482d7262; WORD $0xd000 // VPSHUFB  ZMM10, ZMM10, ZMM0  ; mul low part
+	LONG $0x48257262; WORD $0xd900 // VPSHUFB  ZMM11, ZMM11, ZMM1  ; mul high part
+	LONG $0x48ad5162; WORD $0xd3ef // VPXORQ   ZMM10, ZMM10, ZMM11  ; result
+	LONG $0x48cdd162; WORD $0xf2ef // VPXORQ   ZMM6, ZMM6, ZMM10
+
+	LONG $0x40851362; WORD $0xc743; BYTE $0x00 // VSHUFI64x2 ZMM8, ZMM31, ZMM31, 0x00
+	LONG $0x40851362; WORD $0xcf43; BYTE $0x55 // VSHUFI64x2 ZMM9, ZMM31, ZMM31, 0x55
+	LONG $0x483d7262; WORD $0xc000 // VPSHUFB  ZMM8, ZMM8, ZMM0  ; mul low part
+	LONG $0x48357262; WORD $0xc900 // VPSHUFB  ZMM9, ZMM9, ZMM1  ; mul high part
+	LONG $0x48bd5162; WORD $0xc1ef // VPXORQ   ZMM8, ZMM8, ZMM9  ; result
+	LONG $0x48c5d162; WORD $0xf8ef // VPXORQ   ZMM7, ZMM7, ZMM8
+
+	CMPQ AX, $7
+	JE skip_avx512_parallel84
+
+	MOVQ 168(SI), BX   //  BX: &in[7][0]
+	LONG $0x48feb162; WORD $0x046f; BYTE $0x1b // VMOVDQU64 ZMM0, [rbx+r11]
+	LONG $0x40e53362; WORD $0xf343; BYTE $0xaa // VSHUFI64x2 ZMM14, ZMM19, ZMM19, 0xaa
+	LONG $0x40e53362; WORD $0xfb43; BYTE $0xff // VSHUFI64x2 ZMM15, ZMM19, ZMM19, 0xff
+	LONG $0x48f5f162; WORD $0xd073; BYTE $0x04 // VPSRLQ   ZMM1, ZMM0, 4     ; high input
+	LONG $0x48fdf162; WORD $0xc2db // VPANDQ   ZMM0, ZMM0, ZMM2  ; low input
+	LONG $0x48f5f162; WORD $0xcadb // VPANDQ   ZMM1, ZMM1, ZMM2  ; high input
+	LONG $0x480d7262; WORD $0xf000 // VPSHUFB  ZMM14, ZMM14, ZMM0  ; mul low part
+	LONG $0x48057262; WORD $0xf900 // VPSHUFB  ZMM15, ZMM15, ZMM1  ; mul high part
+	LONG $0x488d5162; WORD $0xf7ef // VPXORQ   ZMM14, ZMM14, ZMM15  ; result
+	LONG $0x48ddd162; WORD $0xe6ef // VPXORQ   ZMM4, ZMM4, ZMM14
+
+	LONG $0x40c53362; WORD $0xe743; BYTE $0xaa // VSHUFI64x2 ZMM12, ZMM23, ZMM23, 0xaa
+	LONG $0x40c53362; WORD $0xef43; BYTE $0xff // VSHUFI64x2 ZMM13, ZMM23, ZMM23, 0xff
+	LONG $0x481d7262; WORD $0xe000 // VPSHUFB  ZMM12, ZMM12, ZMM0  ; mul low part
+	LONG $0x48157262; WORD $0xe900 // VPSHUFB  ZMM13, ZMM13, ZMM1  ; mul high part
+	LONG $0x489d5162; WORD $0xe5ef // VPXORQ   ZMM12, ZMM12, ZMM13  ; result
+	LONG $0x48d5d162; WORD $0xecef // VPXORQ   ZMM5, ZMM5, ZMM12
+
+	LONG $0x40a51362; WORD $0xd343; BYTE $0xaa // VSHUFI64x2 ZMM10, ZMM27, ZMM27, 0xaa
+	LONG $0x40a51362; WORD $0xdb43; BYTE $0xff // VSHUFI64x2 ZMM11, ZMM27, ZMM27, 0xff
+	LONG $0x482d7262; WORD $0xd000 // VPSHUFB  ZMM10, ZMM10, ZMM0  ; mul low part
+	LONG $0x48257262; WORD $0xd900 // VPSHUFB  ZMM11, ZMM11, ZMM1  ; mul high part
+	LONG $0x48ad5162; WORD $0xd3ef // VPXORQ   ZMM10, ZMM10, ZMM11  ; result
+	LONG $0x48cdd162; WORD $0xf2ef // VPXORQ   ZMM6, ZMM6, ZMM10
+
+	LONG $0x40851362; WORD $0xc743; BYTE $0xaa // VSHUFI64x2 ZMM8, ZMM31, ZMM31, 0xaa
+	LONG $0x40851362; WORD $0xcf43; BYTE $0xff // VSHUFI64x2 ZMM9, ZMM31, ZMM31, 0xff
+	LONG $0x483d7262; WORD $0xc000 // VPSHUFB  ZMM8, ZMM8, ZMM0  ; mul low part
+	LONG $0x48357262; WORD $0xc900 // VPSHUFB  ZMM9, ZMM9, ZMM1  ; mul high part
+	LONG $0x48bd5162; WORD $0xc1ef // VPXORQ   ZMM8, ZMM8, ZMM9  ; result
+	LONG $0x48c5d162; WORD $0xf8ef // VPXORQ   ZMM7, ZMM7, ZMM8
+
+skip_avx512_parallel84:
+	LONG $0x48fef162; WORD $0x227f // VMOVDQU64 [rdx], ZMM4
+	LONG $0x48fef162; WORD $0x297f // VMOVDQU64 [rcx], ZMM5
+	LONG $0x48fed162; WORD $0x327f // VMOVDQU64 [r10], ZMM6
+	LONG $0x48fed162; WORD $0x3c7f; BYTE $0x24 // VMOVDQU64 [r12], ZMM7
+
+	ADDQ $64, R11 // in4+=64
+
+	ADDQ $64, DX  // out+=64
+	ADDQ $64, CX  // out2+=64
+	ADDQ $64, R10 // out3+=64
+	ADDQ $64, R12 // out4+=64
+
+	SUBQ $1, R9
+	JNZ  loopback_avx512_parallel84
+
+done_avx512_parallel84:
+	VZEROUPPER
+	RET

+ 92 - 0
vendor/github.com/klauspost/reedsolomon/galois_amd64.go

@@ -0,0 +1,92 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+//go:noescape
+func galMulSSSE3(low, high, in, out []byte)
+
+//go:noescape
+func galMulSSSE3Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2(low, high, in, out []byte)
+
+//go:noescape
+func sSE2XorSlice(in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulSSSE3(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] = low[l] ^ high[h]
+	}
+}
+
+func galMulSSSE3Xor(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] ^= low[l] ^ high[h]
+	}
+}
+*/
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	var done int
+	if o.useAVX2 {
+		galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done = (len(in) >> 5) << 5
+	} else if o.useSSSE3 {
+		galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done = (len(in) >> 4) << 4
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] = mt[in[i]]
+		}
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	var done int
+	if o.useAVX2 {
+		galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done = (len(in) >> 5) << 5
+	} else if o.useSSSE3 {
+		galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+		done = (len(in) >> 4) << 4
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] ^= mt[in[i]]
+		}
+	}
+}
+
+// slice galois add
+func sliceXor(in, out []byte, sse2 bool) {
+	var done int
+	if sse2 {
+		sSE2XorSlice(in, out)
+		done = (len(in) >> 4) << 4
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		for i := done; i < len(in); i++ {
+			out[i] ^= in[i]
+		}
+	}
+}

+ 236 - 0
vendor/github.com/klauspost/reedsolomon/galois_amd64.s

@@ -0,0 +1,236 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
+// and http://jerasure.org/jerasure/gf-complete/tree/master
+
+// func galMulSSSE3Xor(low, high, in, out []byte)
+TEXT ·galMulSSSE3Xor(SB), 7, $0
+	MOVQ   low+0(FP), SI     // SI: &low
+	MOVQ   high+24(FP), DX   // DX: &high
+	MOVOU  (SI), X6          // X6 low
+	MOVOU  (DX), X7          // X7: high
+	MOVQ   $15, BX           // BX: low mask
+	MOVQ   BX, X8
+	PXOR   X5, X5
+	MOVQ   in+48(FP), SI     // R11: &in
+	MOVQ   in_len+56(FP), R9 // R9: len(in)
+	MOVQ   out+72(FP), DX    // DX: &out
+	PSHUFB X5, X8            // X8: lomask (unpacked)
+	SHRQ   $4, R9            // len(in) / 16
+	MOVQ   SI, AX
+	MOVQ   DX, BX
+	ANDQ   $15, AX
+	ANDQ   $15, BX
+	CMPQ   R9, $0
+	JEQ    done_xor
+	ORQ    AX, BX
+	CMPQ   BX, $0
+	JNZ    loopback_xor
+
+loopback_xor_aligned:
+	MOVOA  (SI), X0             // in[x]
+	MOVOA  (DX), X4             // out[x]
+	MOVOA  X0, X1               // in[x]
+	MOVOA  X6, X2               // low copy
+	MOVOA  X7, X3               // high copy
+	PSRLQ  $4, X1               // X1: high input
+	PAND   X8, X0               // X0: low input
+	PAND   X8, X1               // X0: high input
+	PSHUFB X0, X2               // X2: mul low part
+	PSHUFB X1, X3               // X3: mul high part
+	PXOR   X2, X3               // X3: Result
+	PXOR   X4, X3               // X3: Result xor existing out
+	MOVOA  X3, (DX)             // Store
+	ADDQ   $16, SI              // in+=16
+	ADDQ   $16, DX              // out+=16
+	SUBQ   $1, R9
+	JNZ    loopback_xor_aligned
+	JMP    done_xor
+
+loopback_xor:
+	MOVOU  (SI), X0     // in[x]
+	MOVOU  (DX), X4     // out[x]
+	MOVOU  X0, X1       // in[x]
+	MOVOU  X6, X2       // low copy
+	MOVOU  X7, X3       // high copy
+	PSRLQ  $4, X1       // X1: high input
+	PAND   X8, X0       // X0: low input
+	PAND   X8, X1       // X0: high input
+	PSHUFB X0, X2       // X2: mul low part
+	PSHUFB X1, X3       // X3: mul high part
+	PXOR   X2, X3       // X3: Result
+	PXOR   X4, X3       // X3: Result xor existing out
+	MOVOU  X3, (DX)     // Store
+	ADDQ   $16, SI      // in+=16
+	ADDQ   $16, DX      // out+=16
+	SUBQ   $1, R9
+	JNZ    loopback_xor
+
+done_xor:
+	RET
+
+// func galMulSSSE3(low, high, in, out []byte)
+TEXT ·galMulSSSE3(SB), 7, $0
+	MOVQ   low+0(FP), SI     // SI: &low
+	MOVQ   high+24(FP), DX   // DX: &high
+	MOVOU  (SI), X6          // X6 low
+	MOVOU  (DX), X7          // X7: high
+	MOVQ   $15, BX           // BX: low mask
+	MOVQ   BX, X8
+	PXOR   X5, X5
+	MOVQ   in+48(FP), SI     // R11: &in
+	MOVQ   in_len+56(FP), R9 // R9: len(in)
+	MOVQ   out+72(FP), DX    // DX: &out
+	PSHUFB X5, X8            // X8: lomask (unpacked)
+	MOVQ   SI, AX
+	MOVQ   DX, BX
+	SHRQ   $4, R9            // len(in) / 16
+	ANDQ   $15, AX
+	ANDQ   $15, BX
+	CMPQ   R9, $0
+	JEQ    done
+	ORQ    AX, BX
+	CMPQ   BX, $0
+	JNZ    loopback
+
+loopback_aligned:
+	MOVOA  (SI), X0         // in[x]
+	MOVOA  X0, X1           // in[x]
+	MOVOA  X6, X2           // low copy
+	MOVOA  X7, X3           // high copy
+	PSRLQ  $4, X1           // X1: high input
+	PAND   X8, X0           // X0: low input
+	PAND   X8, X1           // X0: high input
+	PSHUFB X0, X2           // X2: mul low part
+	PSHUFB X1, X3           // X3: mul high part
+	PXOR   X2, X3           // X3: Result
+	MOVOA  X3, (DX)         // Store
+	ADDQ   $16, SI          // in+=16
+	ADDQ   $16, DX          // out+=16
+	SUBQ   $1, R9
+	JNZ    loopback_aligned
+	JMP    done
+
+loopback:
+	MOVOU  (SI), X0 // in[x]
+	MOVOU  X0, X1   // in[x]
+	MOVOA  X6, X2   // low copy
+	MOVOA  X7, X3   // high copy
+	PSRLQ  $4, X1   // X1: high input
+	PAND   X8, X0   // X0: low input
+	PAND   X8, X1   // X0: high input
+	PSHUFB X0, X2   // X2: mul low part
+	PSHUFB X1, X3   // X3: mul high part
+	PXOR   X2, X3   // X3: Result
+	MOVOU  X3, (DX) // Store
+	ADDQ   $16, SI  // in+=16
+	ADDQ   $16, DX  // out+=16
+	SUBQ   $1, R9
+	JNZ    loopback
+
+done:
+	RET
+
+// func galMulAVX2Xor(low, high, in, out []byte)
+TEXT ·galMulAVX2Xor(SB), 7, $0
+	MOVQ  low+0(FP), SI     // SI: &low
+	MOVQ  high+24(FP), DX   // DX: &high
+	MOVQ  $15, BX           // BX: low mask
+	MOVQ  BX, X5
+	MOVOU (SI), X6          // X6: low
+	MOVOU (DX), X7          // X7: high
+	MOVQ  in_len+56(FP), R9 // R9: len(in)
+
+	VINSERTI128  $1, X6, Y6, Y6 // low
+	VINSERTI128  $1, X7, Y7, Y7 // high
+	VPBROADCASTB X5, Y8         // Y8: lomask (unpacked)
+
+	SHRQ  $5, R9         // len(in) / 32
+	MOVQ  out+72(FP), DX // DX: &out
+	MOVQ  in+48(FP), SI  // SI: &in
+	TESTQ R9, R9
+	JZ    done_xor_avx2
+
+loopback_xor_avx2:
+	VMOVDQU (SI), Y0
+	VMOVDQU (DX), Y4
+	VPSRLQ  $4, Y0, Y1 // Y1: high input
+	VPAND   Y8, Y0, Y0 // Y0: low input
+	VPAND   Y8, Y1, Y1 // Y1: high input
+	VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+	VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+	VPXOR   Y3, Y2, Y3 // Y3: Result
+	VPXOR   Y4, Y3, Y4 // Y4: Result
+	VMOVDQU Y4, (DX)
+
+	ADDQ $32, SI           // in+=32
+	ADDQ $32, DX           // out+=32
+	SUBQ $1, R9
+	JNZ  loopback_xor_avx2
+
+done_xor_avx2:
+	VZEROUPPER
+	RET
+
+// func galMulAVX2(low, high, in, out []byte)
+TEXT ·galMulAVX2(SB), 7, $0
+	MOVQ  low+0(FP), SI     // SI: &low
+	MOVQ  high+24(FP), DX   // DX: &high
+	MOVQ  $15, BX           // BX: low mask
+	MOVQ  BX, X5
+	MOVOU (SI), X6          // X6: low
+	MOVOU (DX), X7          // X7: high
+	MOVQ  in_len+56(FP), R9 // R9: len(in)
+
+	VINSERTI128  $1, X6, Y6, Y6 // low
+	VINSERTI128  $1, X7, Y7, Y7 // high
+	VPBROADCASTB X5, Y8         // Y8: lomask (unpacked)
+
+	SHRQ  $5, R9         // len(in) / 32
+	MOVQ  out+72(FP), DX // DX: &out
+	MOVQ  in+48(FP), SI  // SI: &in
+	TESTQ R9, R9
+	JZ    done_avx2
+
+loopback_avx2:
+	VMOVDQU (SI), Y0
+	VPSRLQ  $4, Y0, Y1 // Y1: high input
+	VPAND   Y8, Y0, Y0 // Y0: low input
+	VPAND   Y8, Y1, Y1 // Y1: high input
+	VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+	VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+	VPXOR   Y3, Y2, Y4 // Y4: Result
+	VMOVDQU Y4, (DX)
+
+	ADDQ $32, SI       // in+=32
+	ADDQ $32, DX       // out+=32
+	SUBQ $1, R9
+	JNZ  loopback_avx2
+
+done_avx2:
+	VZEROUPPER
+	RET
+
+// func sSE2XorSlice(in, out []byte)
+TEXT ·sSE2XorSlice(SB), 7, $0
+	MOVQ in+0(FP), SI     // SI: &in
+	MOVQ in_len+8(FP), R9 // R9: len(in)
+	MOVQ out+24(FP), DX   // DX: &out
+	SHRQ $4, R9           // len(in) / 16
+	CMPQ R9, $0
+	JEQ  done_xor_sse2
+
+loopback_xor_sse2:
+	MOVOU (SI), X0          // in[x]
+	MOVOU (DX), X1          // out[x]
+	PXOR  X0, X1
+	MOVOU X1, (DX)
+	ADDQ  $16, SI           // in+=16
+	ADDQ  $16, DX           // out+=16
+	SUBQ  $1, R9
+	JNZ   loopback_xor_sse2
+
+done_xor_sse2:
+	RET

+ 52 - 0
vendor/github.com/klauspost/reedsolomon/galois_arm64.go

@@ -0,0 +1,52 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2017, Minio, Inc.
+
+package reedsolomon
+
+//go:noescape
+func galMulNEON(c uint64, in, out []byte)
+
+//go:noescape
+func galMulXorNEON(c uint64, in, out []byte)
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	var done int
+	galMulNEON(uint64(c), in, out)
+	done = (len(in) >> 5) << 5
+
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] = mt[in[i]]
+		}
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	var done int
+	galMulXorNEON(uint64(c), in, out)
+	done = (len(in) >> 5) << 5
+
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] ^= mt[in[i]]
+		}
+	}
+}
+
+// slice galois add
+func sliceXor(in, out []byte, sse2 bool) {
+	for n, input := range in {
+		out[n] ^= input
+	}
+}
+
+func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+}

+ 141 - 0
vendor/github.com/klauspost/reedsolomon/galois_arm64.s

@@ -0,0 +1,141 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2017, Minio, Inc.
+
+// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
+// the opcodes of their Plan9 equivalents
+
+// polynomial multiplication
+#define POLYNOMIAL_MULTIPLICATION \
+	WORD $0x0e3ce340 \ // pmull  v0.8h,v26.8b,v28.8b
+	WORD $0x4e3ce346 \ // pmull2 v6.8h,v26.16b,v28.16b
+	WORD $0x0e3ce36c \ // pmull  v12.8h,v27.8b,v28.8b
+	WORD $0x4e3ce372 // pmull2 v18.8h,v27.16b,v28.16b
+
+// first reduction
+#define FIRST_REDUCTION \
+	WORD $0x0f088402 \ // shrn  v2.8b, v0.8h, #8
+	WORD $0x0f0884c8 \ // shrn  v8.8b, v6.8h, #8
+	WORD $0x0f08858e \ // shrn  v14.8b, v12.8h, #8
+	WORD $0x0f088654 \ // shrn  v20.8b, v18.8h, #8
+	WORD $0x0e22e3c3 \ // pmull v3.8h,v30.8b,v2.8b
+	WORD $0x0e28e3c9 \ // pmull v9.8h,v30.8b,v8.8b
+	WORD $0x0e2ee3cf \ // pmull v15.8h,v30.8b,v14.8b
+	WORD $0x0e34e3d5 \ // pmull v21.8h,v30.8b,v20.8b
+	WORD $0x6e201c60 \ // eor   v0.16b,v3.16b,v0.16b
+	WORD $0x6e261d26 \ // eor   v6.16b,v9.16b,v6.16b
+	WORD $0x6e2c1dec \ // eor   v12.16b,v15.16b,v12.16b
+	WORD $0x6e321eb2 // eor   v18.16b,v21.16b,v18.16b
+
+// second reduction
+#define SECOND_REDUCTION \
+	WORD $0x0f088404 \ // shrn  v4.8b, v0.8h, #8
+	WORD $0x0f0884ca \ // shrn  v10.8b, v6.8h, #8
+	WORD $0x0f088590 \ // shrn  v16.8b, v12.8h, #8
+	WORD $0x0f088656 \ // shrn  v22.8b, v18.8h, #8
+	WORD $0x6e241c44 \ // eor   v4.16b,v2.16b,v4.16b
+	WORD $0x6e2a1d0a \ // eor   v10.16b,v8.16b,v10.16b
+	WORD $0x6e301dd0 \ // eor   v16.16b,v14.16b,v16.16b
+	WORD $0x6e361e96 \ // eor   v22.16b,v20.16b,v22.16b
+	WORD $0x0e24e3c5 \ // pmull v5.8h,v30.8b,v4.8b
+	WORD $0x0e2ae3cb \ // pmull v11.8h,v30.8b,v10.8b
+	WORD $0x0e30e3d1 \ // pmull v17.8h,v30.8b,v16.8b
+	WORD $0x0e36e3d7 \ // pmull v23.8h,v30.8b,v22.8b
+	WORD $0x6e201ca0 \ // eor   v0.16b,v5.16b,v0.16b
+	WORD $0x6e261d61 \ // eor   v1.16b,v11.16b,v6.16b
+	WORD $0x6e2c1e22 \ // eor   v2.16b,v17.16b,v12.16b
+	WORD $0x6e321ee3 // eor   v3.16b,v23.16b,v18.16b
+
+// func galMulNEON(c uint64, in, out []byte)
+TEXT ·galMulNEON(SB), 7, $0
+	MOVD c+0(FP), R0
+	MOVD in_base+8(FP), R1
+	MOVD in_len+16(FP), R2   // length of message
+	MOVD out_base+32(FP), R5
+	SUBS $32, R2
+	BMI  complete
+
+	// Load constants table pointer
+	MOVD $·constants(SB), R3
+
+	// and load constants into v30 & v31
+	WORD $0x4c40a07e // ld1    {v30.16b-v31.16b}, [x3]
+
+	WORD $0x4e010c1c // dup    v28.16b, w0
+
+loop:
+	// Main loop
+	WORD $0x4cdfa83a // ld1   {v26.4s-v27.4s}, [x1], #32
+
+	POLYNOMIAL_MULTIPLICATION
+
+	FIRST_REDUCTION
+
+	SECOND_REDUCTION
+
+	// combine results
+	WORD $0x4e1f2000 // tbl v0.16b,{v0.16b,v1.16b},v31.16b
+	WORD $0x4e1f2041 // tbl v1.16b,{v2.16b,v3.16b},v31.16b
+
+	// Store result
+	WORD $0x4c9faca0 // st1    {v0.2d-v1.2d}, [x5], #32
+
+	SUBS $32, R2
+	BPL  loop
+
+complete:
+	RET
+
+// func galMulXorNEON(c uint64, in, out []byte)
+TEXT ·galMulXorNEON(SB), 7, $0
+	MOVD c+0(FP), R0
+	MOVD in_base+8(FP), R1
+	MOVD in_len+16(FP), R2   // length of message
+	MOVD out_base+32(FP), R5
+	SUBS $32, R2
+	BMI  completeXor
+
+	// Load constants table pointer
+	MOVD $·constants(SB), R3
+
+	// and load constants into v30 & v31
+	WORD $0x4c40a07e // ld1    {v30.16b-v31.16b}, [x3]
+
+	WORD $0x4e010c1c // dup    v28.16b, w0
+
+loopXor:
+	// Main loop
+	WORD $0x4cdfa83a // ld1   {v26.4s-v27.4s}, [x1], #32
+	WORD $0x4c40a8b8 // ld1   {v24.4s-v25.4s}, [x5]
+
+	POLYNOMIAL_MULTIPLICATION
+
+	FIRST_REDUCTION
+
+	SECOND_REDUCTION
+
+	// combine results
+	WORD $0x4e1f2000 // tbl v0.16b,{v0.16b,v1.16b},v31.16b
+	WORD $0x4e1f2041 // tbl v1.16b,{v2.16b,v3.16b},v31.16b
+
+	// Xor result and store
+	WORD $0x6e381c00 // eor v0.16b,v0.16b,v24.16b
+	WORD $0x6e391c21 // eor v1.16b,v1.16b,v25.16b
+	WORD $0x4c9faca0 // st1   {v0.2d-v1.2d}, [x5], #32
+
+	SUBS $32, R2
+	BPL  loopXor
+
+completeXor:
+	RET
+
+// Constants table
+//   generating polynomial is 29 (= 0x1d)
+DATA ·constants+0x0(SB)/8, $0x1d1d1d1d1d1d1d1d
+DATA ·constants+0x8(SB)/8, $0x1d1d1d1d1d1d1d1d
+//   constant for TBL instruction
+DATA ·constants+0x10(SB)/8, $0x0e0c0a0806040200
+DATA ·constants+0x18(SB)/8, $0x1e1c1a1816141210
+
+GLOBL ·constants(SB), 8, $32

+ 34 - 0
vendor/github.com/klauspost/reedsolomon/galois_noasm.go

@@ -0,0 +1,34 @@
+//+build !amd64 noasm appengine gccgo
+//+build !arm64 noasm appengine gccgo
+//+build !ppc64le noasm appengine gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	mt := mulTable[c][:256]
+	out = out[:len(in)]
+	for n, input := range in {
+		out[n] = mt[input]
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	mt := mulTable[c][:256]
+	out = out[:len(in)]
+	for n, input := range in {
+		out[n] ^= mt[input]
+	}
+}
+
+// slice galois add
+func sliceXor(in, out []byte, sse2 bool) {
+	for n, input := range in {
+		out[n] ^= input
+	}
+}
+
+func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+	panic("unreachable")
+}

+ 70 - 0
vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go

@@ -0,0 +1,70 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2018, Minio, Inc.
+
+package reedsolomon
+
+//go:noescape
+func galMulPpc(low, high, in, out []byte)
+
+//go:noescape
+func galMulPpcXor(low, high, in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulPpc(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] = low[l] ^ high[h]
+	}
+}
+func galMulPpcXor(low, high, in, out []byte) {
+	for n, input := range in {
+		l := input & 0xf
+		h := input >> 4
+		out[n] ^= low[l] ^ high[h]
+	}
+}
+*/
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+	done := (len(in) >> 4) << 4
+	if done > 0 {
+		galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] = mt[in[i]]
+		}
+	}
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+	done := (len(in) >> 4) << 4
+	if done > 0 {
+		galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
+	}
+	remain := len(in) - done
+	if remain > 0 {
+		mt := mulTable[c][:256]
+		for i := done; i < len(in); i++ {
+			out[i] ^= mt[in[i]]
+		}
+	}
+}
+
+// slice galois add
+func sliceXor(in, out []byte, sse2 bool) {
+	for n, input := range in {
+		out[n] ^= input
+	}
+}
+
+func (r reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+}

+ 126 - 0
vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s

@@ -0,0 +1,126 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2018, Minio, Inc.
+
+#include "textflag.h"
+
+#define LOW       R3
+#define HIGH      R4
+#define IN        R5
+#define LEN       R6
+#define OUT       R7
+#define CONSTANTS R8
+#define OFFSET    R9
+#define OFFSET1   R10
+#define OFFSET2   R11
+
+#define X6        VS34
+#define X6_       V2
+#define X7        VS35
+#define X7_       V3
+#define MSG       VS36
+#define MSG_      V4
+#define MSG_HI    VS37
+#define MSG_HI_   V5
+#define RESULT    VS38
+#define RESULT_   V6
+#define ROTATE    VS39
+#define ROTATE_   V7
+#define MASK      VS40
+#define MASK_     V8
+#define FLIP      VS41
+#define FLIP_     V9
+
+
+// func galMulPpc(low, high, in, out []byte)
+TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
+    MOVD    low+0(FP), LOW
+    MOVD    high+24(FP), HIGH
+    MOVD    in+48(FP), IN
+    MOVD    in_len+56(FP), LEN
+    MOVD    out+72(FP), OUT
+
+    MOVD    $16, OFFSET1
+    MOVD    $32, OFFSET2
+
+    MOVD    $·constants(SB), CONSTANTS
+    LXVD2X  (CONSTANTS)(R0), ROTATE
+    LXVD2X  (CONSTANTS)(OFFSET1), MASK
+    LXVD2X  (CONSTANTS)(OFFSET2), FLIP
+
+    LXVD2X  (LOW)(R0), X6
+    LXVD2X  (HIGH)(R0), X7
+    VPERM   X6_, V31, FLIP_, X6_
+    VPERM   X7_, V31, FLIP_, X7_
+
+    MOVD    $0, OFFSET
+
+loop:
+    LXVD2X  (IN)(OFFSET), MSG
+
+    VSRB    MSG_, ROTATE_, MSG_HI_
+    VAND    MSG_, MASK_, MSG_
+    VPERM   X6_, V31, MSG_, MSG_
+    VPERM   X7_, V31, MSG_HI_, MSG_HI_
+
+    VXOR    MSG_, MSG_HI_, MSG_
+
+    STXVD2X MSG, (OUT)(OFFSET)
+
+    ADD     $16, OFFSET, OFFSET
+    CMP     LEN, OFFSET
+    BGT     loop
+    RET
+
+
+// func galMulPpcXorlow, high, in, out []byte)
+TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
+    MOVD    low+0(FP), LOW
+    MOVD    high+24(FP), HIGH
+    MOVD    in+48(FP), IN
+    MOVD    in_len+56(FP), LEN
+    MOVD    out+72(FP), OUT
+
+    MOVD    $16, OFFSET1
+    MOVD    $32, OFFSET2
+
+    MOVD    $·constants(SB), CONSTANTS
+    LXVD2X  (CONSTANTS)(R0), ROTATE
+    LXVD2X  (CONSTANTS)(OFFSET1), MASK
+    LXVD2X  (CONSTANTS)(OFFSET2), FLIP
+
+    LXVD2X  (LOW)(R0), X6
+    LXVD2X  (HIGH)(R0), X7
+    VPERM   X6_, V31, FLIP_, X6_
+    VPERM   X7_, V31, FLIP_, X7_
+
+    MOVD    $0, OFFSET
+
+loopXor:
+    LXVD2X  (IN)(OFFSET), MSG
+    LXVD2X  (OUT)(OFFSET), RESULT
+
+    VSRB    MSG_, ROTATE_, MSG_HI_
+    VAND    MSG_, MASK_, MSG_
+    VPERM   X6_, V31, MSG_, MSG_
+    VPERM   X7_, V31, MSG_HI_, MSG_HI_
+
+    VXOR    MSG_, MSG_HI_, MSG_
+    VXOR    MSG_, RESULT_, RESULT_
+
+    STXVD2X RESULT, (OUT)(OFFSET)
+
+    ADD     $16, OFFSET, OFFSET
+    CMP     LEN, OFFSET
+    BGT     loopXor
+    RET
+
+DATA ·constants+0x0(SB)/8, $0x0404040404040404
+DATA ·constants+0x8(SB)/8, $0x0404040404040404
+DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
+DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
+DATA ·constants+0x20(SB)/8, $0x0706050403020100
+DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
+
+GLOBL ·constants(SB), 8, $48

+ 132 - 0
vendor/github.com/klauspost/reedsolomon/gentables.go

@@ -0,0 +1,132 @@
+//+build ignore
+
+package main
+
+import (
+	"fmt"
+)
+
+var logTable = [fieldSize]int16{
+	-1, 0, 1, 25, 2, 50, 26, 198,
+	3, 223, 51, 238, 27, 104, 199, 75,
+	4, 100, 224, 14, 52, 141, 239, 129,
+	28, 193, 105, 248, 200, 8, 76, 113,
+	5, 138, 101, 47, 225, 36, 15, 33,
+	53, 147, 142, 218, 240, 18, 130, 69,
+	29, 181, 194, 125, 106, 39, 249, 185,
+	201, 154, 9, 120, 77, 228, 114, 166,
+	6, 191, 139, 98, 102, 221, 48, 253,
+	226, 152, 37, 179, 16, 145, 34, 136,
+	54, 208, 148, 206, 143, 150, 219, 189,
+	241, 210, 19, 92, 131, 56, 70, 64,
+	30, 66, 182, 163, 195, 72, 126, 110,
+	107, 58, 40, 84, 250, 133, 186, 61,
+	202, 94, 155, 159, 10, 21, 121, 43,
+	78, 212, 229, 172, 115, 243, 167, 87,
+	7, 112, 192, 247, 140, 128, 99, 13,
+	103, 74, 222, 237, 49, 197, 254, 24,
+	227, 165, 153, 119, 38, 184, 180, 124,
+	17, 68, 146, 217, 35, 32, 137, 46,
+	55, 63, 209, 91, 149, 188, 207, 205,
+	144, 135, 151, 178, 220, 252, 190, 97,
+	242, 86, 211, 171, 20, 42, 93, 158,
+	132, 60, 57, 83, 71, 109, 65, 162,
+	31, 45, 67, 216, 183, 123, 164, 118,
+	196, 23, 73, 236, 127, 12, 111, 246,
+	108, 161, 59, 82, 41, 157, 85, 170,
+	251, 96, 134, 177, 187, 204, 62, 90,
+	203, 89, 95, 176, 156, 169, 160, 81,
+	11, 245, 22, 235, 122, 117, 44, 215,
+	79, 174, 213, 233, 230, 231, 173, 232,
+	116, 214, 244, 234, 168, 80, 88, 175,
+}
+
+const (
+	// The number of elements in the field.
+	fieldSize = 256
+
+	// The polynomial used to generate the logarithm table.
+	//
+	// There are a number of polynomials that work to generate
+	// a Galois field of 256 elements.  The choice is arbitrary,
+	// and we just use the first one.
+	//
+	// The possibilities are: 29, 43, 45, 77, 95, 99, 101, 105,
+	//* 113, 135, 141, 169, 195, 207, 231, and 245.
+	generatingPolynomial = 29
+)
+
+func main() {
+	t := generateExpTable()
+	fmt.Printf("var expTable = %#v\n", t)
+	//t2 := generateMulTableSplit(t)
+	//fmt.Printf("var mulTable = %#v\n", t2)
+	low, high := generateMulTableHalf(t)
+	fmt.Printf("var mulTableLow = %#v\n", low)
+	fmt.Printf("var mulTableHigh = %#v\n", high)
+}
+
+/**
+ * Generates the inverse log table.
+ */
+func generateExpTable() []byte {
+	result := make([]byte, fieldSize*2-2)
+	for i := 1; i < fieldSize; i++ {
+		log := logTable[i]
+		result[log] = byte(i)
+		result[log+fieldSize-1] = byte(i)
+	}
+	return result
+}
+
+func generateMulTable(expTable []byte) []byte {
+	result := make([]byte, 256*256)
+	for v := range result {
+		a := byte(v & 0xff)
+		b := byte(v >> 8)
+		if a == 0 || b == 0 {
+			result[v] = 0
+			continue
+		}
+		logA := int(logTable[a])
+		logB := int(logTable[b])
+		result[v] = expTable[logA+logB]
+	}
+	return result
+}
+
+func generateMulTableSplit(expTable []byte) [256][256]byte {
+	var result [256][256]byte
+	for a := range result {
+		for b := range result[a] {
+			if a == 0 || b == 0 {
+				result[a][b] = 0
+				continue
+			}
+			logA := int(logTable[a])
+			logB := int(logTable[b])
+			result[a][b] = expTable[logA+logB]
+		}
+	}
+	return result
+}
+
+func generateMulTableHalf(expTable []byte) (low [256][16]byte, high [256][16]byte) {
+	for a := range low {
+		for b := range low {
+			result := 0
+			if !(a == 0 || b == 0) {
+				logA := int(logTable[a])
+				logB := int(logTable[b])
+				result = int(expTable[logA+logB])
+			}
+			if (b & 0xf) == b {
+				low[a][b] = byte(result)
+			}
+			if (b & 0xf0) == b {
+				high[a][b>>4] = byte(result)
+			}
+		}
+	}
+	return
+}

+ 160 - 0
vendor/github.com/klauspost/reedsolomon/inversion_tree.go

@@ -0,0 +1,160 @@
+/**
+ * A thread-safe tree which caches inverted matrices.
+ *
+ * Copyright 2016, Peter Collins
+ */
+
+package reedsolomon
+
+import (
+	"errors"
+	"sync"
+)
+
+// The tree uses a Reader-Writer mutex to make it thread-safe
+// when accessing cached matrices and inserting new ones.
+type inversionTree struct {
+	mutex *sync.RWMutex
+	root  inversionNode
+}
+
+type inversionNode struct {
+	matrix   matrix
+	children []*inversionNode
+}
+
+// newInversionTree initializes a tree for storing inverted matrices.
+// Note that the root node is the identity matrix as it implies
+// there were no errors with the original data.
+func newInversionTree(dataShards, parityShards int) inversionTree {
+	identity, _ := identityMatrix(dataShards)
+	root := inversionNode{
+		matrix:   identity,
+		children: make([]*inversionNode, dataShards+parityShards),
+	}
+	return inversionTree{
+		mutex: &sync.RWMutex{},
+		root:  root,
+	}
+}
+
+// GetInvertedMatrix returns the cached inverted matrix or nil if it
+// is not found in the tree keyed on the indices of invalid rows.
+func (t inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
+	// Lock the tree for reading before accessing the tree.
+	t.mutex.RLock()
+	defer t.mutex.RUnlock()
+
+	// If no invalid indices were give we should return the root
+	// identity matrix.
+	if len(invalidIndices) == 0 {
+		return t.root.matrix
+	}
+
+	// Recursively search for the inverted matrix in the tree, passing in
+	// 0 as the parent index as we start at the root of the tree.
+	return t.root.getInvertedMatrix(invalidIndices, 0)
+}
+
+// errAlreadySet is returned if the root node matrix is overwritten
+var errAlreadySet = errors.New("the root node identity matrix is already set")
+
+// InsertInvertedMatrix inserts a new inverted matrix into the tree
+// keyed by the indices of invalid rows.  The total number of shards
+// is required for creating the proper length lists of child nodes for
+// each node.
+func (t inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
+	// If no invalid indices were given then we are done because the
+	// root node is already set with the identity matrix.
+	if len(invalidIndices) == 0 {
+		return errAlreadySet
+	}
+
+	if !matrix.IsSquare() {
+		return errNotSquare
+	}
+
+	// Lock the tree for writing and reading before accessing the tree.
+	t.mutex.Lock()
+	defer t.mutex.Unlock()
+
+	// Recursively create nodes for the inverted matrix in the tree until
+	// we reach the node to insert the matrix to.  We start by passing in
+	// 0 as the parent index as we start at the root of the tree.
+	t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
+
+	return nil
+}
+
+func (n inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
+	// Get the child node to search next from the list of children.  The
+	// list of children starts relative to the parent index passed in
+	// because the indices of invalid rows is sorted (by default).  As we
+	// search recursively, the first invalid index gets popped off the list,
+	// so when searching through the list of children, use that first invalid
+	// index to find the child node.
+	firstIndex := invalidIndices[0]
+	node := n.children[firstIndex-parent]
+
+	// If the child node doesn't exist in the list yet, fail fast by
+	// returning, so we can construct and insert the proper inverted matrix.
+	if node == nil {
+		return nil
+	}
+
+	// If there's more than one invalid index left in the list we should
+	// keep searching recursively.
+	if len(invalidIndices) > 1 {
+		// Search recursively on the child node by passing in the invalid indices
+		// with the first index popped off the front.  Also the parent index to
+		// pass down is the first index plus one.
+		return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
+	}
+	// If there aren't any more invalid indices to search, we've found our
+	// node.  Return it, however keep in mind that the matrix could still be
+	// nil because intermediary nodes in the tree are created sometimes with
+	// their inversion matrices uninitialized.
+	return node.matrix
+}
+
+func (n inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
+	// As above, get the child node to search next from the list of children.
+	// The list of children starts relative to the parent index passed in
+	// because the indices of invalid rows is sorted (by default).  As we
+	// search recursively, the first invalid index gets popped off the list,
+	// so when searching through the list of children, use that first invalid
+	// index to find the child node.
+	firstIndex := invalidIndices[0]
+	node := n.children[firstIndex-parent]
+
+	// If the child node doesn't exist in the list yet, create a new
+	// node because we have the writer lock and add it to the list
+	// of children.
+	if node == nil {
+		// Make the length of the list of children equal to the number
+		// of shards minus the first invalid index because the list of
+		// invalid indices is sorted, so only this length of errors
+		// are possible in the tree.
+		node = &inversionNode{
+			children: make([]*inversionNode, shards-firstIndex),
+		}
+		// Insert the new node into the tree at the first index relative
+		// to the parent index that was given in this recursive call.
+		n.children[firstIndex-parent] = node
+	}
+
+	// If there's more than one invalid index left in the list we should
+	// keep searching recursively in order to find the node to add our
+	// matrix.
+	if len(invalidIndices) > 1 {
+		// As above, search recursively on the child node by passing in
+		// the invalid indices with the first index popped off the front.
+		// Also the total number of shards and parent index are passed down
+		// which is equal to the first index plus one.
+		node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
+	} else {
+		// If there aren't any more invalid indices to search, we've found our
+		// node.  Cache the inverted matrix in this node.
+		node.matrix = matrix
+	}
+}

+ 279 - 0
vendor/github.com/klauspost/reedsolomon/matrix.go

@@ -0,0 +1,279 @@
+/**
+ * Matrix Algebra over an 8-bit Galois Field
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+package reedsolomon
+
+import (
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// byte[row][col]
+type matrix [][]byte
+
+// newMatrix returns a matrix of zeros.
+func newMatrix(rows, cols int) (matrix, error) {
+	if rows <= 0 {
+		return nil, errInvalidRowSize
+	}
+	if cols <= 0 {
+		return nil, errInvalidColSize
+	}
+
+	m := matrix(make([][]byte, rows))
+	for i := range m {
+		m[i] = make([]byte, cols)
+	}
+	return m, nil
+}
+
+// NewMatrixData initializes a matrix with the given row-major data.
+// Note that data is not copied from input.
+func newMatrixData(data [][]byte) (matrix, error) {
+	m := matrix(data)
+	err := m.Check()
+	if err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+// IdentityMatrix returns an identity matrix of the given size.
+func identityMatrix(size int) (matrix, error) {
+	m, err := newMatrix(size, size)
+	if err != nil {
+		return nil, err
+	}
+	for i := range m {
+		m[i][i] = 1
+	}
+	return m, nil
+}
+
+// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
+var errInvalidRowSize = errors.New("invalid row size")
+
+// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
+var errInvalidColSize = errors.New("invalid column size")
+
+// errColSizeMismatch is returned if the size of matrix columns mismatch.
+var errColSizeMismatch = errors.New("column size is not the same for all rows")
+
+func (m matrix) Check() error {
+	rows := len(m)
+	if rows <= 0 {
+		return errInvalidRowSize
+	}
+	cols := len(m[0])
+	if cols <= 0 {
+		return errInvalidColSize
+	}
+
+	for _, col := range m {
+		if len(col) != cols {
+			return errColSizeMismatch
+		}
+	}
+	return nil
+}
+
+// String returns a human-readable string of the matrix contents.
+//
+// Example: [[1, 2], [3, 4]]
+func (m matrix) String() string {
+	rowOut := make([]string, 0, len(m))
+	for _, row := range m {
+		colOut := make([]string, 0, len(row))
+		for _, col := range row {
+			colOut = append(colOut, strconv.Itoa(int(col)))
+		}
+		rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
+	}
+	return "[" + strings.Join(rowOut, ", ") + "]"
+}
+
+// Multiply multiplies this matrix (the one on the left) by another
+// matrix (the one on the right) and returns a new matrix with the result.
+func (m matrix) Multiply(right matrix) (matrix, error) {
+	if len(m[0]) != len(right) {
+		return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
+	}
+	result, _ := newMatrix(len(m), len(right[0]))
+	for r, row := range result {
+		for c := range row {
+			var value byte
+			for i := range m[0] {
+				value ^= galMultiply(m[r][i], right[i][c])
+			}
+			result[r][c] = value
+		}
+	}
+	return result, nil
+}
+
+// Augment returns the concatenation of this matrix and the matrix on the right.
+func (m matrix) Augment(right matrix) (matrix, error) {
+	if len(m) != len(right) {
+		return nil, errMatrixSize
+	}
+
+	result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
+	for r, row := range m {
+		for c := range row {
+			result[r][c] = m[r][c]
+		}
+		cols := len(m[0])
+		for c := range right[0] {
+			result[r][cols+c] = right[r][c]
+		}
+	}
+	return result, nil
+}
+
+// errMatrixSize is returned if matrix dimensions are doesn't match.
+var errMatrixSize = errors.New("matrix sizes do not match")
+
+func (m matrix) SameSize(n matrix) error {
+	if len(m) != len(n) {
+		return errMatrixSize
+	}
+	for i := range m {
+		if len(m[i]) != len(n[i]) {
+			return errMatrixSize
+		}
+	}
+	return nil
+}
+
+// SubMatrix returns a part of this matrix. Data is copied.
+func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
+	result, err := newMatrix(rmax-rmin, cmax-cmin)
+	if err != nil {
+		return nil, err
+	}
+	// OPTME: If used heavily, use copy function to copy slice
+	for r := rmin; r < rmax; r++ {
+		for c := cmin; c < cmax; c++ {
+			result[r-rmin][c-cmin] = m[r][c]
+		}
+	}
+	return result, nil
+}
+
+// SwapRows Exchanges two rows in the matrix.
+func (m matrix) SwapRows(r1, r2 int) error {
+	if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
+		return errInvalidRowSize
+	}
+	m[r2], m[r1] = m[r1], m[r2]
+	return nil
+}
+
+// IsSquare will return true if the matrix is square
+// and nil if the matrix is square
+func (m matrix) IsSquare() bool {
+	return len(m) == len(m[0])
+}
+
+// errSingular is returned if the matrix is singular and cannot be inversed
+var errSingular = errors.New("matrix is singular")
+
+// errNotSquare is returned if attempting to inverse a non-square matrix.
+var errNotSquare = errors.New("only square matrices can be inverted")
+
+// Invert returns the inverse of this matrix.
+// Returns ErrSingular when the matrix is singular and doesn't have an inverse.
+// The matrix must be square, otherwise ErrNotSquare is returned.
+func (m matrix) Invert() (matrix, error) {
+	if !m.IsSquare() {
+		return nil, errNotSquare
+	}
+
+	size := len(m)
+	work, _ := identityMatrix(size)
+	work, _ = m.Augment(work)
+
+	err := work.gaussianElimination()
+	if err != nil {
+		return nil, err
+	}
+
+	return work.SubMatrix(0, size, size, size*2)
+}
+
+func (m matrix) gaussianElimination() error {
+	rows := len(m)
+	columns := len(m[0])
+	// Clear out the part below the main diagonal and scale the main
+	// diagonal to be 1.
+	for r := 0; r < rows; r++ {
+		// If the element on the diagonal is 0, find a row below
+		// that has a non-zero and swap them.
+		if m[r][r] == 0 {
+			for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
+				if m[rowBelow][r] != 0 {
+					m.SwapRows(r, rowBelow)
+					break
+				}
+			}
+		}
+		// If we couldn't find one, the matrix is singular.
+		if m[r][r] == 0 {
+			return errSingular
+		}
+		// Scale to 1.
+		if m[r][r] != 1 {
+			scale := galDivide(1, m[r][r])
+			for c := 0; c < columns; c++ {
+				m[r][c] = galMultiply(m[r][c], scale)
+			}
+		}
+		// Make everything below the 1 be a 0 by subtracting
+		// a multiple of it.  (Subtraction and addition are
+		// both exclusive or in the Galois field.)
+		for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
+			if m[rowBelow][r] != 0 {
+				scale := m[rowBelow][r]
+				for c := 0; c < columns; c++ {
+					m[rowBelow][c] ^= galMultiply(scale, m[r][c])
+				}
+			}
+		}
+	}
+
+	// Now clear the part above the main diagonal.
+	for d := 0; d < rows; d++ {
+		for rowAbove := 0; rowAbove < d; rowAbove++ {
+			if m[rowAbove][d] != 0 {
+				scale := m[rowAbove][d]
+				for c := 0; c < columns; c++ {
+					m[rowAbove][c] ^= galMultiply(scale, m[d][c])
+				}
+
+			}
+		}
+	}
+	return nil
+}
+
+// Create a Vandermonde matrix, which is guaranteed to have the
+// property that any subset of rows that forms a square matrix
+// is invertible.
+func vandermonde(rows, cols int) (matrix, error) {
+	result, err := newMatrix(rows, cols)
+	if err != nil {
+		return nil, err
+	}
+	for r, row := range result {
+		for c := range row {
+			result[r][c] = galExp(byte(r), c)
+		}
+	}
+	return result, nil
+}

+ 118 - 0
vendor/github.com/klauspost/reedsolomon/options.go

@@ -0,0 +1,118 @@
+package reedsolomon
+
+import (
+	"runtime"
+
+	"github.com/klauspost/cpuid"
+)
+
+// Option allows to override processing parameters.
+type Option func(*options)
+
+type options struct {
+	maxGoroutines                         int
+	minSplitSize                          int
+	useAVX512, useAVX2, useSSSE3, useSSE2 bool
+	usePAR1Matrix                         bool
+	useCauchy                             bool
+	shardSize                             int
+}
+
+var defaultOptions = options{
+	maxGoroutines: 384,
+	minSplitSize:  1024,
+}
+
+func init() {
+	if runtime.GOMAXPROCS(0) <= 1 {
+		defaultOptions.maxGoroutines = 1
+	}
+	// Detect CPU capabilities.
+	defaultOptions.useSSSE3 = cpuid.CPU.SSSE3()
+	defaultOptions.useSSE2 = cpuid.CPU.SSE2()
+	defaultOptions.useAVX2 = cpuid.CPU.AVX2()
+	defaultOptions.useAVX512 = cpuid.CPU.AVX512F() && cpuid.CPU.AVX512BW()
+}
+
+// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
+// Jobs will be split into this many parts, unless each goroutine would have to process
+// less than minSplitSize bytes (set with WithMinSplitSize).
+// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
+// scheduling.
+// If n <= 0, it is ignored.
+func WithMaxGoroutines(n int) Option {
+	return func(o *options) {
+		if n > 0 {
+			o.maxGoroutines = n
+		}
+	}
+}
+
+// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
+// specific shard size.
+// Send in the shard size you expect to send. Other shard sizes will work, but may not
+// run at the optimal speed.
+// Overwrites WithMaxGoroutines.
+// If shardSize <= 0, it is ignored.
+func WithAutoGoroutines(shardSize int) Option {
+	return func(o *options) {
+		o.shardSize = shardSize
+	}
+}
+
+// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
+// See WithMaxGoroutines on how jobs are split.
+// If n <= 0, it is ignored.
+func WithMinSplitSize(n int) Option {
+	return func(o *options) {
+		if n > 0 {
+			o.minSplitSize = n
+		}
+	}
+}
+
+func withSSE3(enabled bool) Option {
+	return func(o *options) {
+		o.useSSSE3 = enabled
+	}
+}
+
+func withAVX2(enabled bool) Option {
+	return func(o *options) {
+		o.useAVX2 = enabled
+	}
+}
+
+func withSSE2(enabled bool) Option {
+	return func(o *options) {
+		o.useSSE2 = enabled
+	}
+}
+
+func withAVX512(enabled bool) Option {
+	return func(o *options) {
+		o.useAVX512 = enabled
+	}
+}
+
+// WithPAR1Matrix causes the encoder to build the matrix how PARv1
+// does. Note that the method they use is buggy, and may lead to cases
+// where recovery is impossible, even if there are enough parity
+// shards.
+func WithPAR1Matrix() Option {
+	return func(o *options) {
+		o.usePAR1Matrix = true
+		o.useCauchy = false
+	}
+}
+
+// WithCauchyMatrix will make the encoder build a Cauchy style matrix.
+// The output of this is not compatible with the standard output.
+// A Cauchy matrix is faster to generate. This does not affect data throughput,
+// but will result in slightly faster start-up time.
+func WithCauchyMatrix() Option {
+	return func(o *options) {
+		o.useCauchy = true
+		o.usePAR1Matrix = false
+	}
+}

+ 887 - 0
vendor/github.com/klauspost/reedsolomon/reedsolomon.go

@@ -0,0 +1,887 @@
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+// Package reedsolomon enables Erasure Coding in Go
+//
+// For usage and examples, see https://github.com/klauspost/reedsolomon
+//
+package reedsolomon
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"runtime"
+	"sync"
+
+	"github.com/klauspost/cpuid"
+)
+
+// Encoder is an interface to encode Reed-Salomon parity sets for your data.
+type Encoder interface {
+	// Encode parity for a set of data shards.
+	// Input is 'shards' containing data shards followed by parity shards.
+	// The number of shards must match the number given to New().
+	// Each shard is a byte array, and they must all be the same size.
+	// The parity shards will always be overwritten and the data shards
+	// will remain the same, so it is safe for you to read from the
+	// data shards while this is running.
+	Encode(shards [][]byte) error
+
+	// Verify returns true if the parity shards contain correct data.
+	// The data is the same format as Encode. No data is modified, so
+	// you are allowed to read from data while this is running.
+	Verify(shards [][]byte) (bool, error)
+
+	// Reconstruct will recreate the missing shards if possible.
+	//
+	// Given a list of shards, some of which contain data, fills in the
+	// ones that don't have data.
+	//
+	// The length of the array must be equal to the total number of shards.
+	// You indicate that a shard is missing by setting it to nil or zero-length.
+	// If a shard is zero-length but has sufficient capacity, that memory will
+	// be used, otherwise a new []byte will be allocated.
+	//
+	// If there are too few shards to reconstruct the missing
+	// ones, ErrTooFewShards will be returned.
+	//
+	// The reconstructed shard set is complete, but integrity is not verified.
+	// Use the Verify function to check if data set is ok.
+	Reconstruct(shards [][]byte) error
+
+	// ReconstructData will recreate any missing data shards, if possible.
+	//
+	// Given a list of shards, some of which contain data, fills in the
+	// data shards that don't have data.
+	//
+	// The length of the array must be equal to Shards.
+	// You indicate that a shard is missing by setting it to nil or zero-length.
+	// If a shard is zero-length but has sufficient capacity, that memory will
+	// be used, otherwise a new []byte will be allocated.
+	//
+	// If there are too few shards to reconstruct the missing
+	// ones, ErrTooFewShards will be returned.
+	//
+	// As the reconstructed shard set may contain missing parity shards,
+	// calling the Verify function is likely to fail.
+	ReconstructData(shards [][]byte) error
+
+	// Update parity is use for change a few data shards and update it's parity.
+	// Input 'newDatashards' containing data shards changed.
+	// Input 'shards' containing old data shards (if data shard not changed, it can be nil) and old parity shards.
+	// new parity shards will in shards[DataShards:]
+	// Update is very useful if  DataShards much larger than ParityShards and changed data shards is few. It will
+	// faster than Encode and not need read all data shards to encode.
+	Update(shards [][]byte, newDatashards [][]byte) error
+
+	// Split a data slice into the number of shards given to the encoder,
+	// and create empty parity shards.
+	//
+	// The data will be split into equally sized shards.
+	// If the data size isn't dividable by the number of shards,
+	// the last shard will contain extra zeros.
+	//
+	// There must be at least 1 byte otherwise ErrShortData will be
+	// returned.
+	//
+	// The data will not be copied, except for the last shard, so you
+	// should not modify the data of the input slice afterwards.
+	Split(data []byte) ([][]byte, error)
+
+	// Join the shards and write the data segment to dst.
+	//
+	// Only the data shards are considered.
+	// You must supply the exact output size you want.
+	// If there are to few shards given, ErrTooFewShards will be returned.
+	// If the total data size is less than outSize, ErrShortData will be returned.
+	Join(dst io.Writer, shards [][]byte, outSize int) error
+}
+
+// reedSolomon contains a matrix for a specific
+// distribution of datashards and parity shards.
+// Construct if using New()
+type reedSolomon struct {
+	DataShards   int // Number of data shards, should not be modified.
+	ParityShards int // Number of parity shards, should not be modified.
+	Shards       int // Total number of shards. Calculated, and should not be modified.
+	m            matrix
+	tree         inversionTree
+	parity       [][]byte
+	o            options
+}
+
+// ErrInvShardNum will be returned by New, if you attempt to create
+// an Encoder where either data or parity shards is zero or less.
+var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/parity shards")
+
+// ErrMaxShardNum will be returned by New, if you attempt to create an
+// Encoder where data and parity shards are bigger than the order of
+// GF(2^8).
+var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
+
+// buildMatrix creates the matrix to use for encoding, given the
+// number of data shards and the number of total shards.
+//
+// The top square of the matrix is guaranteed to be an identity
+// matrix, which means that the data shards are unchanged after
+// encoding.
+func buildMatrix(dataShards, totalShards int) (matrix, error) {
+	// Start with a Vandermonde matrix.  This matrix would work,
+	// in theory, but doesn't have the property that the data
+	// shards are unchanged after encoding.
+	vm, err := vandermonde(totalShards, dataShards)
+	if err != nil {
+		return nil, err
+	}
+
+	// Multiply by the inverse of the top square of the matrix.
+	// This will make the top square be the identity matrix, but
+	// preserve the property that any square subset of rows is
+	// invertible.
+	top, err := vm.SubMatrix(0, 0, dataShards, dataShards)
+	if err != nil {
+		return nil, err
+	}
+
+	topInv, err := top.Invert()
+	if err != nil {
+		return nil, err
+	}
+
+	return vm.Multiply(topInv)
+}
+
+// buildMatrixPAR1 creates the matrix to use for encoding according to
+// the PARv1 spec, given the number of data shards and the number of
+// total shards. Note that the method they use is buggy, and may lead
+// to cases where recovery is impossible, even if there are enough
+// parity shards.
+//
+// The top square of the matrix is guaranteed to be an identity
+// matrix, which means that the data shards are unchanged after
+// encoding.
+func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) {
+	result, err := newMatrix(totalShards, dataShards)
+	if err != nil {
+		return nil, err
+	}
+
+	for r, row := range result {
+		// The top portion of the matrix is the identity
+		// matrix, and the bottom is a transposed Vandermonde
+		// matrix starting at 1 instead of 0.
+		if r < dataShards {
+			result[r][r] = 1
+		} else {
+			for c := range row {
+				result[r][c] = galExp(byte(c+1), r-dataShards)
+			}
+		}
+	}
+	return result, nil
+}
+
+func buildMatrixCauchy(dataShards, totalShards int) (matrix, error) {
+	result, err := newMatrix(totalShards, dataShards)
+	if err != nil {
+		return nil, err
+	}
+
+	for r, row := range result {
+		// The top portion of the matrix is the identity
+		// matrix, and the bottom is a transposed Cauchy matrix.
+		if r < dataShards {
+			result[r][r] = 1
+		} else {
+			for c := range row {
+				result[r][c] = invTable[(byte(r ^ c))]
+			}
+		}
+	}
+	return result, nil
+}
+
+// New creates a new encoder and initializes it to
+// the number of data shards and parity shards that
+// you want to use. You can reuse this encoder.
+// Note that the maximum number of total shards is 256.
+// If no options are supplied, default options are used.
+func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
+	r := reedSolomon{
+		DataShards:   dataShards,
+		ParityShards: parityShards,
+		Shards:       dataShards + parityShards,
+		o:            defaultOptions,
+	}
+
+	for _, opt := range opts {
+		opt(&r.o)
+	}
+	if dataShards <= 0 || parityShards <= 0 {
+		return nil, ErrInvShardNum
+	}
+
+	if dataShards+parityShards > 256 {
+		return nil, ErrMaxShardNum
+	}
+
+	var err error
+	switch {
+	case r.o.useCauchy:
+		r.m, err = buildMatrixCauchy(dataShards, r.Shards)
+	case r.o.usePAR1Matrix:
+		r.m, err = buildMatrixPAR1(dataShards, r.Shards)
+	default:
+		r.m, err = buildMatrix(dataShards, r.Shards)
+	}
+	if err != nil {
+		return nil, err
+	}
+	if r.o.shardSize > 0 {
+		cacheSize := cpuid.CPU.Cache.L2
+		if cacheSize <= 0 {
+			// Set to 128K if undetectable.
+			cacheSize = 128 << 10
+		}
+		p := runtime.NumCPU()
+
+		// 1 input + parity must fit in cache, and we add one more to be safer.
+		shards := 1 + parityShards
+		g := (r.o.shardSize * shards) / (cacheSize - (cacheSize >> 4))
+
+		if cpuid.CPU.ThreadsPerCore > 1 {
+			// If multiple threads per core, make sure they don't contend for cache.
+			g *= cpuid.CPU.ThreadsPerCore
+		}
+		g *= 2
+		if g < p {
+			g = p
+		}
+
+		// Have g be multiple of p
+		g += p - 1
+		g -= g % p
+
+		r.o.maxGoroutines = g
+	}
+
+	// Inverted matrices are cached in a tree keyed by the indices
+	// of the invalid rows of the data to reconstruct.
+	// The inversion root node will have the identity matrix as
+	// its inversion matrix because it implies there are no errors
+	// with the original data.
+	r.tree = newInversionTree(dataShards, parityShards)
+
+	r.parity = make([][]byte, parityShards)
+	for i := range r.parity {
+		r.parity[i] = r.m[dataShards+i]
+	}
+
+	return &r, err
+}
+
+// ErrTooFewShards is returned if too few shards where given to
+// Encode/Verify/Reconstruct/Update. It will also be returned from Reconstruct
+// if there were too few shards to reconstruct the missing data.
+var ErrTooFewShards = errors.New("too few shards given")
+
+// Encodes parity for a set of data shards.
+// An array 'shards' containing data shards followed by parity shards.
+// The number of shards must match the number given to New.
+// Each shard is a byte array, and they must all be the same size.
+// The parity shards will always be overwritten and the data shards
+// will remain the same.
+func (r reedSolomon) Encode(shards [][]byte) error {
+	if len(shards) != r.Shards {
+		return ErrTooFewShards
+	}
+
+	err := checkShards(shards, false)
+	if err != nil {
+		return err
+	}
+
+	// Get the slice of output buffers.
+	output := shards[r.DataShards:]
+
+	// Do the coding.
+	r.codeSomeShards(r.parity, shards[0:r.DataShards], output, r.ParityShards, len(shards[0]))
+	return nil
+}
+
+// ErrInvalidInput is returned if invalid input parameter of Update.
+var ErrInvalidInput = errors.New("invalid input")
+
+func (r reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
+	if len(shards) != r.Shards {
+		return ErrTooFewShards
+	}
+
+	if len(newDatashards) != r.DataShards {
+		return ErrTooFewShards
+	}
+
+	err := checkShards(shards, true)
+	if err != nil {
+		return err
+	}
+
+	err = checkShards(newDatashards, true)
+	if err != nil {
+		return err
+	}
+
+	for i := range newDatashards {
+		if newDatashards[i] != nil && shards[i] == nil {
+			return ErrInvalidInput
+		}
+	}
+	for _, p := range shards[r.DataShards:] {
+		if p == nil {
+			return ErrInvalidInput
+		}
+	}
+
+	shardSize := shardSize(shards)
+
+	// Get the slice of output buffers.
+	output := shards[r.DataShards:]
+
+	// Do the coding.
+	r.updateParityShards(r.parity, shards[0:r.DataShards], newDatashards[0:r.DataShards], output, r.ParityShards, shardSize)
+	return nil
+}
+
+func (r reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
+	if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
+		r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount)
+		return
+	}
+
+	for c := 0; c < r.DataShards; c++ {
+		in := newinputs[c]
+		if in == nil {
+			continue
+		}
+		oldin := oldinputs[c]
+		// oldinputs data will be change
+		sliceXor(in, oldin, r.o.useSSE2)
+		for iRow := 0; iRow < outputCount; iRow++ {
+			galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o)
+		}
+	}
+}
+
+func (r reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
+	var wg sync.WaitGroup
+	do := byteCount / r.o.maxGoroutines
+	if do < r.o.minSplitSize {
+		do = r.o.minSplitSize
+	}
+	start := 0
+	for start < byteCount {
+		if start+do > byteCount {
+			do = byteCount - start
+		}
+		wg.Add(1)
+		go func(start, stop int) {
+			for c := 0; c < r.DataShards; c++ {
+				in := newinputs[c]
+				if in == nil {
+					continue
+				}
+				oldin := oldinputs[c]
+				// oldinputs data will be change
+				sliceXor(in[start:stop], oldin[start:stop], r.o.useSSE2)
+				for iRow := 0; iRow < outputCount; iRow++ {
+					galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o)
+				}
+			}
+			wg.Done()
+		}(start, start+do)
+		start += do
+	}
+	wg.Wait()
+}
+
+// Verify returns true if the parity shards contain the right data.
+// The data is the same format as Encode. No data is modified.
+func (r reedSolomon) Verify(shards [][]byte) (bool, error) {
+	if len(shards) != r.Shards {
+		return false, ErrTooFewShards
+	}
+	err := checkShards(shards, false)
+	if err != nil {
+		return false, err
+	}
+
+	// Slice of buffers being checked.
+	toCheck := shards[r.DataShards:]
+
+	// Do the checking.
+	return r.checkSomeShards(r.parity, shards[0:r.DataShards], toCheck, r.ParityShards, len(shards[0])), nil
+}
+
+// Multiplies a subset of rows from a coding matrix by a full set of
+// input shards to produce some output shards.
+// 'matrixRows' is The rows from the matrix to use.
+// 'inputs' An array of byte arrays, each of which is one input shard.
+// The number of inputs used is determined by the length of each matrix row.
+// outputs Byte arrays where the computed shards are stored.
+// The number of outputs computed, and the
+// number of matrix rows used, is determined by
+// outputCount, which is the number of outputs to compute.
+func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+	if r.o.useAVX512 && len(inputs) >= 4 && len(outputs) >= 2 {
+		r.codeSomeShardsAvx512(matrixRows, inputs, outputs, outputCount, byteCount)
+		return
+	} else if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
+		r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
+		return
+	}
+	for c := 0; c < r.DataShards; c++ {
+		in := inputs[c]
+		for iRow := 0; iRow < outputCount; iRow++ {
+			if c == 0 {
+				galMulSlice(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+			} else {
+				galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+			}
+		}
+	}
+}
+
+// Perform the same as codeSomeShards, but split the workload into
+// several goroutines.
+func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+	var wg sync.WaitGroup
+	do := byteCount / r.o.maxGoroutines
+	if do < r.o.minSplitSize {
+		do = r.o.minSplitSize
+	}
+	// Make sizes divisible by 32
+	do = (do + 31) & (^31)
+	start := 0
+	for start < byteCount {
+		if start+do > byteCount {
+			do = byteCount - start
+		}
+		wg.Add(1)
+		go func(start, stop int) {
+			for c := 0; c < r.DataShards; c++ {
+				in := inputs[c][start:stop]
+				for iRow := 0; iRow < outputCount; iRow++ {
+					if c == 0 {
+						galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:stop], &r.o)
+					} else {
+						galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:stop], &r.o)
+					}
+				}
+			}
+			wg.Done()
+		}(start, start+do)
+		start += do
+	}
+	wg.Wait()
+}
+
+// checkSomeShards is mostly the same as codeSomeShards,
+// except this will check values and return
+// as soon as a difference is found.
+func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
+	if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
+		return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
+	}
+	outputs := make([][]byte, len(toCheck))
+	for i := range outputs {
+		outputs[i] = make([]byte, byteCount)
+	}
+	for c := 0; c < r.DataShards; c++ {
+		in := inputs[c]
+		for iRow := 0; iRow < outputCount; iRow++ {
+			galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+		}
+	}
+
+	for i, calc := range outputs {
+		if !bytes.Equal(calc, toCheck[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+func (r reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
+	same := true
+	var mu sync.RWMutex // For above
+
+	var wg sync.WaitGroup
+	do := byteCount / r.o.maxGoroutines
+	if do < r.o.minSplitSize {
+		do = r.o.minSplitSize
+	}
+	// Make sizes divisible by 32
+	do = (do + 31) & (^31)
+	start := 0
+	for start < byteCount {
+		if start+do > byteCount {
+			do = byteCount - start
+		}
+		wg.Add(1)
+		go func(start, do int) {
+			defer wg.Done()
+			outputs := make([][]byte, len(toCheck))
+			for i := range outputs {
+				outputs[i] = make([]byte, do)
+			}
+			for c := 0; c < r.DataShards; c++ {
+				mu.RLock()
+				if !same {
+					mu.RUnlock()
+					return
+				}
+				mu.RUnlock()
+				in := inputs[c][start : start+do]
+				for iRow := 0; iRow < outputCount; iRow++ {
+					galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+				}
+			}
+
+			for i, calc := range outputs {
+				if !bytes.Equal(calc, toCheck[i][start:start+do]) {
+					mu.Lock()
+					same = false
+					mu.Unlock()
+					return
+				}
+			}
+		}(start, do)
+		start += do
+	}
+	wg.Wait()
+	return same
+}
+
+// ErrShardNoData will be returned if there are no shards,
+// or if the length of all shards is zero.
+var ErrShardNoData = errors.New("no shard data")
+
+// ErrShardSize is returned if shard length isn't the same for all
+// shards.
+var ErrShardSize = errors.New("shard sizes do not match")
+
+// checkShards will check if shards are the same size
+// or 0, if allowed. An error is returned if this fails.
+// An error is also returned if all shards are size 0.
+func checkShards(shards [][]byte, nilok bool) error {
+	size := shardSize(shards)
+	if size == 0 {
+		return ErrShardNoData
+	}
+	for _, shard := range shards {
+		if len(shard) != size {
+			if len(shard) != 0 || !nilok {
+				return ErrShardSize
+			}
+		}
+	}
+	return nil
+}
+
+// shardSize return the size of a single shard.
+// The first non-zero size is returned,
+// or 0 if all shards are size 0.
+func shardSize(shards [][]byte) int {
+	for _, shard := range shards {
+		if len(shard) != 0 {
+			return len(shard)
+		}
+	}
+	return 0
+}
+
+// Reconstruct will recreate the missing shards, if possible.
+//
+// Given a list of shards, some of which contain data, fills in the
+// ones that don't have data.
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil or zero-length.
+// If a shard is zero-length but has sufficient capacity, that memory will
+// be used, otherwise a new []byte will be allocated.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// The reconstructed shard set is complete, but integrity is not verified.
+// Use the Verify function to check if data set is ok.
+func (r reedSolomon) Reconstruct(shards [][]byte) error {
+	return r.reconstruct(shards, false)
+}
+
+// ReconstructData will recreate any missing data shards, if possible.
+//
+// Given a list of shards, some of which contain data, fills in the
+// data shards that don't have data.
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil or zero-length.
+// If a shard is zero-length but has sufficient capacity, that memory will
+// be used, otherwise a new []byte will be allocated.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// As the reconstructed shard set may contain missing parity shards,
+// calling the Verify function is likely to fail.
+func (r reedSolomon) ReconstructData(shards [][]byte) error {
+	return r.reconstruct(shards, true)
+}
+
+// reconstruct will recreate the missing data shards, and unless
+// dataOnly is true, also the missing parity shards
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+func (r reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
+	if len(shards) != r.Shards {
+		return ErrTooFewShards
+	}
+	// Check arguments.
+	err := checkShards(shards, true)
+	if err != nil {
+		return err
+	}
+
+	shardSize := shardSize(shards)
+
+	// Quick check: are all of the shards present?  If so, there's
+	// nothing to do.
+	numberPresent := 0
+	for i := 0; i < r.Shards; i++ {
+		if len(shards[i]) != 0 {
+			numberPresent++
+		}
+	}
+	if numberPresent == r.Shards {
+		// Cool.  All of the shards data data.  We don't
+		// need to do anything.
+		return nil
+	}
+
+	// More complete sanity check
+	if numberPresent < r.DataShards {
+		return ErrTooFewShards
+	}
+
+	// Pull out an array holding just the shards that
+	// correspond to the rows of the submatrix.  These shards
+	// will be the input to the decoding process that re-creates
+	// the missing data shards.
+	//
+	// Also, create an array of indices of the valid rows we do have
+	// and the invalid rows we don't have up until we have enough valid rows.
+	subShards := make([][]byte, r.DataShards)
+	validIndices := make([]int, r.DataShards)
+	invalidIndices := make([]int, 0)
+	subMatrixRow := 0
+	for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ {
+		if len(shards[matrixRow]) != 0 {
+			subShards[subMatrixRow] = shards[matrixRow]
+			validIndices[subMatrixRow] = matrixRow
+			subMatrixRow++
+		} else {
+			invalidIndices = append(invalidIndices, matrixRow)
+		}
+	}
+
+	// Attempt to get the cached inverted matrix out of the tree
+	// based on the indices of the invalid rows.
+	dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices)
+
+	// If the inverted matrix isn't cached in the tree yet we must
+	// construct it ourselves and insert it into the tree for the
+	// future.  In this way the inversion tree is lazily loaded.
+	if dataDecodeMatrix == nil {
+		// Pull out the rows of the matrix that correspond to the
+		// shards that we have and build a square matrix.  This
+		// matrix could be used to generate the shards that we have
+		// from the original data.
+		subMatrix, _ := newMatrix(r.DataShards, r.DataShards)
+		for subMatrixRow, validIndex := range validIndices {
+			for c := 0; c < r.DataShards; c++ {
+				subMatrix[subMatrixRow][c] = r.m[validIndex][c]
+			}
+		}
+		// Invert the matrix, so we can go from the encoded shards
+		// back to the original data.  Then pull out the row that
+		// generates the shard that we want to decode.  Note that
+		// since this matrix maps back to the original data, it can
+		// be used to create a data shard, but not a parity shard.
+		dataDecodeMatrix, err = subMatrix.Invert()
+		if err != nil {
+			return err
+		}
+
+		// Cache the inverted matrix in the tree for future use keyed on the
+		// indices of the invalid rows.
+		err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.Shards)
+		if err != nil {
+			return err
+		}
+	}
+
+	// Re-create any data shards that were missing.
+	//
+	// The input to the coding is all of the shards we actually
+	// have, and the output is the missing data shards.  The computation
+	// is done using the special decode matrix we just built.
+	outputs := make([][]byte, r.ParityShards)
+	matrixRows := make([][]byte, r.ParityShards)
+	outputCount := 0
+
+	for iShard := 0; iShard < r.DataShards; iShard++ {
+		if len(shards[iShard]) == 0 {
+			if cap(shards[iShard]) >= shardSize {
+				shards[iShard] = shards[iShard][0:shardSize]
+			} else {
+				shards[iShard] = make([]byte, shardSize)
+			}
+			outputs[outputCount] = shards[iShard]
+			matrixRows[outputCount] = dataDecodeMatrix[iShard]
+			outputCount++
+		}
+	}
+	r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], outputCount, shardSize)
+
+	if dataOnly {
+		// Exit out early if we are only interested in the data shards
+		return nil
+	}
+
+	// Now that we have all of the data shards intact, we can
+	// compute any of the parity that is missing.
+	//
+	// The input to the coding is ALL of the data shards, including
+	// any that we just calculated.  The output is whichever of the
+	// data shards were missing.
+	outputCount = 0
+	for iShard := r.DataShards; iShard < r.Shards; iShard++ {
+		if len(shards[iShard]) == 0 {
+			if cap(shards[iShard]) >= shardSize {
+				shards[iShard] = shards[iShard][0:shardSize]
+			} else {
+				shards[iShard] = make([]byte, shardSize)
+			}
+			outputs[outputCount] = shards[iShard]
+			matrixRows[outputCount] = r.parity[iShard-r.DataShards]
+			outputCount++
+		}
+	}
+	r.codeSomeShards(matrixRows, shards[:r.DataShards], outputs[:outputCount], outputCount, shardSize)
+	return nil
+}
+
+// ErrShortData will be returned by Split(), if there isn't enough data
+// to fill the number of shards.
+var ErrShortData = errors.New("not enough data to fill the number of requested shards")
+
+// Split a data slice into the number of shards given to the encoder,
+// and create empty parity shards if necessary.
+//
+// The data will be split into equally sized shards.
+// If the data size isn't divisible by the number of shards,
+// the last shard will contain extra zeros.
+//
+// There must be at least 1 byte otherwise ErrShortData will be
+// returned.
+//
+// The data will not be copied, except for the last shard, so you
+// should not modify the data of the input slice afterwards.
+func (r reedSolomon) Split(data []byte) ([][]byte, error) {
+	if len(data) == 0 {
+		return nil, ErrShortData
+	}
+	// Calculate number of bytes per data shard.
+	perShard := (len(data) + r.DataShards - 1) / r.DataShards
+
+	if cap(data) > len(data) {
+		data = data[:cap(data)]
+	}
+
+	// Only allocate memory if necessary
+	if len(data) < (r.Shards * perShard) {
+		// Pad data to r.Shards*perShard.
+		padding := make([]byte, (r.Shards*perShard)-len(data))
+		data = append(data, padding...)
+	}
+
+	// Split into equal-length shards.
+	dst := make([][]byte, r.Shards)
+	for i := range dst {
+		dst[i] = data[:perShard]
+		data = data[perShard:]
+	}
+
+	return dst, nil
+}
+
+// ErrReconstructRequired is returned if too few data shards are intact and a
+// reconstruction is required before you can successfully join the shards.
+var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil")
+
+// Join the shards and write the data segment to dst.
+//
+// Only the data shards are considered.
+// You must supply the exact output size you want.
+//
+// If there are to few shards given, ErrTooFewShards will be returned.
+// If the total data size is less than outSize, ErrShortData will be returned.
+// If one or more required data shards are nil, ErrReconstructRequired will be returned.
+func (r reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
+	// Do we have enough shards?
+	if len(shards) < r.DataShards {
+		return ErrTooFewShards
+	}
+	shards = shards[:r.DataShards]
+
+	// Do we have enough data?
+	size := 0
+	for _, shard := range shards {
+		if shard == nil {
+			return ErrReconstructRequired
+		}
+		size += len(shard)
+
+		// Do we have enough data already?
+		if size >= outSize {
+			break
+		}
+	}
+	if size < outSize {
+		return ErrShortData
+	}
+
+	// Copy data to dst
+	write := outSize
+	for _, shard := range shards {
+		if write < len(shard) {
+			_, err := dst.Write(shard[:write])
+			return err
+		}
+		n, err := dst.Write(shard)
+		if err != nil {
+			return err
+		}
+		write -= n
+	}
+	return nil
+}

+ 584 - 0
vendor/github.com/klauspost/reedsolomon/streaming.go

@@ -0,0 +1,584 @@
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+package reedsolomon
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"sync"
+)
+
+// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
+// It provides a fully streaming interface, and processes data in blocks of up to 4MB.
+//
+// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
+// since the streaming interface has a start up overhead.
+//
+// For all operations, no readers and writers should not assume any order/size of
+// individual reads/writes.
+//
+// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
+// folder.
+type StreamEncoder interface {
+	// Encode parity shards for a set of data shards.
+	//
+	// Input is 'shards' containing readers for data shards followed by parity shards
+	// io.Writer.
+	//
+	// The number of shards must match the number given to NewStream().
+	//
+	// Each reader must supply the same number of bytes.
+	//
+	// The parity shards will be written to the writer.
+	// The number of bytes written will match the input size.
+	//
+	// If a data stream returns an error, a StreamReadError type error
+	// will be returned. If a parity writer returns an error, a
+	// StreamWriteError will be returned.
+	Encode(data []io.Reader, parity []io.Writer) error
+
+	// Verify returns true if the parity shards contain correct data.
+	//
+	// The number of shards must match the number total data+parity shards
+	// given to NewStream().
+	//
+	// Each reader must supply the same number of bytes.
+	// If a shard stream returns an error, a StreamReadError type error
+	// will be returned.
+	Verify(shards []io.Reader) (bool, error)
+
+	// Reconstruct will recreate the missing shards if possible.
+	//
+	// Given a list of valid shards (to read) and invalid shards (to write)
+	//
+	// You indicate that a shard is missing by setting it to nil in the 'valid'
+	// slice and at the same time setting a non-nil writer in "fill".
+	// An index cannot contain both non-nil 'valid' and 'fill' entry.
+	// If both are provided 'ErrReconstructMismatch' is returned.
+	//
+	// If there are too few shards to reconstruct the missing
+	// ones, ErrTooFewShards will be returned.
+	//
+	// The reconstructed shard set is complete, but integrity is not verified.
+	// Use the Verify function to check if data set is ok.
+	Reconstruct(valid []io.Reader, fill []io.Writer) error
+
+	// Split a an input stream into the number of shards given to the encoder.
+	//
+	// The data will be split into equally sized shards.
+	// If the data size isn't dividable by the number of shards,
+	// the last shard will contain extra zeros.
+	//
+	// You must supply the total size of your input.
+	// 'ErrShortData' will be returned if it is unable to retrieve the
+	// number of bytes indicated.
+	Split(data io.Reader, dst []io.Writer, size int64) (err error)
+
+	// Join the shards and write the data segment to dst.
+	//
+	// Only the data shards are considered.
+	//
+	// You must supply the exact output size you want.
+	// If there are to few shards given, ErrTooFewShards will be returned.
+	// If the total data size is less than outSize, ErrShortData will be returned.
+	Join(dst io.Writer, shards []io.Reader, outSize int64) error
+}
+
+// StreamReadError is returned when a read error is encountered
+// that relates to a supplied stream.
+// This will allow you to find out which reader has failed.
+type StreamReadError struct {
+	Err    error // The error
+	Stream int   // The stream number on which the error occurred
+}
+
+// Error returns the error as a string
+func (s StreamReadError) Error() string {
+	return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
+}
+
+// String returns the error as a string
+func (s StreamReadError) String() string {
+	return s.Error()
+}
+
+// StreamWriteError is returned when a write error is encountered
+// that relates to a supplied stream. This will allow you to
+// find out which reader has failed.
+type StreamWriteError struct {
+	Err    error // The error
+	Stream int   // The stream number on which the error occurred
+}
+
+// Error returns the error as a string
+func (s StreamWriteError) Error() string {
+	return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
+}
+
+// String returns the error as a string
+func (s StreamWriteError) String() string {
+	return s.Error()
+}
+
+// rsStream contains a matrix for a specific
+// distribution of datashards and parity shards.
+// Construct if using NewStream()
+type rsStream struct {
+	r  *reedSolomon
+	bs int // Block size
+	// Shard reader
+	readShards func(dst [][]byte, in []io.Reader) error
+	// Shard writer
+	writeShards func(out []io.Writer, in [][]byte) error
+	creads      bool
+	cwrites     bool
+}
+
+// NewStream creates a new encoder and initializes it to
+// the number of data shards and parity shards that
+// you want to use. You can reuse this encoder.
+// Note that the maximum number of data shards is 256.
+func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
+	enc, err := New(dataShards, parityShards, o...)
+	if err != nil {
+		return nil, err
+	}
+	rs := enc.(*reedSolomon)
+	r := rsStream{r: rs, bs: 4 << 20}
+	r.readShards = readShards
+	r.writeShards = writeShards
+	return &r, err
+}
+
+// NewStreamC creates a new encoder and initializes it to
+// the number of data shards and parity shards given.
+//
+// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
+func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
+	enc, err := New(dataShards, parityShards, o...)
+	if err != nil {
+		return nil, err
+	}
+	rs := enc.(*reedSolomon)
+	r := rsStream{r: rs, bs: 4 << 20}
+	r.readShards = readShards
+	r.writeShards = writeShards
+	if conReads {
+		r.readShards = cReadShards
+	}
+	if conWrites {
+		r.writeShards = cWriteShards
+	}
+	return &r, err
+}
+
+func createSlice(n, length int) [][]byte {
+	out := make([][]byte, n)
+	for i := range out {
+		out[i] = make([]byte, length)
+	}
+	return out
+}
+
+// Encodes parity shards for a set of data shards.
+//
+// Input is 'shards' containing readers for data shards followed by parity shards
+// io.Writer.
+//
+// The number of shards must match the number given to NewStream().
+//
+// Each reader must supply the same number of bytes.
+//
+// The parity shards will be written to the writer.
+// The number of bytes written will match the input size.
+//
+// If a data stream returns an error, a StreamReadError type error
+// will be returned. If a parity writer returns an error, a
+// StreamWriteError will be returned.
+func (r rsStream) Encode(data []io.Reader, parity []io.Writer) error {
+	if len(data) != r.r.DataShards {
+		return ErrTooFewShards
+	}
+
+	if len(parity) != r.r.ParityShards {
+		return ErrTooFewShards
+	}
+
+	all := createSlice(r.r.Shards, r.bs)
+	in := all[:r.r.DataShards]
+	out := all[r.r.DataShards:]
+	read := 0
+
+	for {
+		err := r.readShards(in, data)
+		switch err {
+		case nil:
+		case io.EOF:
+			if read == 0 {
+				return ErrShardNoData
+			}
+			return nil
+		default:
+			return err
+		}
+		out = trimShards(out, shardSize(in))
+		read += shardSize(in)
+		err = r.r.Encode(all)
+		if err != nil {
+			return err
+		}
+		err = r.writeShards(parity, out)
+		if err != nil {
+			return err
+		}
+	}
+}
+
+// Trim the shards so they are all the same size
+func trimShards(in [][]byte, size int) [][]byte {
+	for i := range in {
+		if in[i] != nil {
+			in[i] = in[i][0:size]
+		}
+		if len(in[i]) < size {
+			in[i] = nil
+		}
+	}
+	return in
+}
+
+func readShards(dst [][]byte, in []io.Reader) error {
+	if len(in) != len(dst) {
+		panic("internal error: in and dst size do not match")
+	}
+	size := -1
+	for i := range in {
+		if in[i] == nil {
+			dst[i] = nil
+			continue
+		}
+		n, err := io.ReadFull(in[i], dst[i])
+		// The error is EOF only if no bytes were read.
+		// If an EOF happens after reading some but not all the bytes,
+		// ReadFull returns ErrUnexpectedEOF.
+		switch err {
+		case io.ErrUnexpectedEOF, io.EOF:
+			if size < 0 {
+				size = n
+			} else if n != size {
+				// Shard sizes must match.
+				return ErrShardSize
+			}
+			dst[i] = dst[i][0:n]
+		case nil:
+			continue
+		default:
+			return StreamReadError{Err: err, Stream: i}
+		}
+	}
+	if size == 0 {
+		return io.EOF
+	}
+	return nil
+}
+
+func writeShards(out []io.Writer, in [][]byte) error {
+	if len(out) != len(in) {
+		panic("internal error: in and out size do not match")
+	}
+	for i := range in {
+		if out[i] == nil {
+			continue
+		}
+		n, err := out[i].Write(in[i])
+		if err != nil {
+			return StreamWriteError{Err: err, Stream: i}
+		}
+		//
+		if n != len(in[i]) {
+			return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
+		}
+	}
+	return nil
+}
+
+type readResult struct {
+	n    int
+	size int
+	err  error
+}
+
+// cReadShards reads shards concurrently
+func cReadShards(dst [][]byte, in []io.Reader) error {
+	if len(in) != len(dst) {
+		panic("internal error: in and dst size do not match")
+	}
+	var wg sync.WaitGroup
+	wg.Add(len(in))
+	res := make(chan readResult, len(in))
+	for i := range in {
+		if in[i] == nil {
+			dst[i] = nil
+			wg.Done()
+			continue
+		}
+		go func(i int) {
+			defer wg.Done()
+			n, err := io.ReadFull(in[i], dst[i])
+			// The error is EOF only if no bytes were read.
+			// If an EOF happens after reading some but not all the bytes,
+			// ReadFull returns ErrUnexpectedEOF.
+			res <- readResult{size: n, err: err, n: i}
+
+		}(i)
+	}
+	wg.Wait()
+	close(res)
+	size := -1
+	for r := range res {
+		switch r.err {
+		case io.ErrUnexpectedEOF, io.EOF:
+			if size < 0 {
+				size = r.size
+			} else if r.size != size {
+				// Shard sizes must match.
+				return ErrShardSize
+			}
+			dst[r.n] = dst[r.n][0:r.size]
+		case nil:
+		default:
+			return StreamReadError{Err: r.err, Stream: r.n}
+		}
+	}
+	if size == 0 {
+		return io.EOF
+	}
+	return nil
+}
+
+// cWriteShards writes shards concurrently
+func cWriteShards(out []io.Writer, in [][]byte) error {
+	if len(out) != len(in) {
+		panic("internal error: in and out size do not match")
+	}
+	var errs = make(chan error, len(out))
+	var wg sync.WaitGroup
+	wg.Add(len(out))
+	for i := range in {
+		go func(i int) {
+			defer wg.Done()
+			if out[i] == nil {
+				errs <- nil
+				return
+			}
+			n, err := out[i].Write(in[i])
+			if err != nil {
+				errs <- StreamWriteError{Err: err, Stream: i}
+				return
+			}
+			if n != len(in[i]) {
+				errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
+			}
+		}(i)
+	}
+	wg.Wait()
+	close(errs)
+	for err := range errs {
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// Verify returns true if the parity shards contain correct data.
+//
+// The number of shards must match the number total data+parity shards
+// given to NewStream().
+//
+// Each reader must supply the same number of bytes.
+// If a shard stream returns an error, a StreamReadError type error
+// will be returned.
+func (r rsStream) Verify(shards []io.Reader) (bool, error) {
+	if len(shards) != r.r.Shards {
+		return false, ErrTooFewShards
+	}
+
+	read := 0
+	all := createSlice(r.r.Shards, r.bs)
+	for {
+		err := r.readShards(all, shards)
+		if err == io.EOF {
+			if read == 0 {
+				return false, ErrShardNoData
+			}
+			return true, nil
+		}
+		if err != nil {
+			return false, err
+		}
+		read += shardSize(all)
+		ok, err := r.r.Verify(all)
+		if !ok || err != nil {
+			return ok, err
+		}
+	}
+}
+
+// ErrReconstructMismatch is returned by the StreamEncoder, if you supply
+// "valid" and "fill" streams on the same index.
+// Therefore it is impossible to see if you consider the shard valid
+// or would like to have it reconstructed.
+var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
+
+// Reconstruct will recreate the missing shards if possible.
+//
+// Given a list of valid shards (to read) and invalid shards (to write)
+//
+// You indicate that a shard is missing by setting it to nil in the 'valid'
+// slice and at the same time setting a non-nil writer in "fill".
+// An index cannot contain both non-nil 'valid' and 'fill' entry.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// The reconstructed shard set is complete when explicitly asked for all missing shards.
+// However its integrity is not automatically verified.
+// Use the Verify function to check in case the data set is complete.
+func (r rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
+	if len(valid) != r.r.Shards {
+		return ErrTooFewShards
+	}
+	if len(fill) != r.r.Shards {
+		return ErrTooFewShards
+	}
+
+	all := createSlice(r.r.Shards, r.bs)
+	reconDataOnly := true
+	for i := range valid {
+		if valid[i] != nil && fill[i] != nil {
+			return ErrReconstructMismatch
+		}
+		if i >= r.r.DataShards && fill[i] != nil {
+			reconDataOnly = false
+		}
+	}
+
+	read := 0
+	for {
+		err := r.readShards(all, valid)
+		if err == io.EOF {
+			if read == 0 {
+				return ErrShardNoData
+			}
+			return nil
+		}
+		if err != nil {
+			return err
+		}
+		read += shardSize(all)
+		all = trimShards(all, shardSize(all))
+
+		if reconDataOnly {
+			err = r.r.ReconstructData(all) // just reconstruct missing data shards
+		} else {
+			err = r.r.Reconstruct(all) //  reconstruct all missing shards
+		}
+		if err != nil {
+			return err
+		}
+		err = r.writeShards(fill, all)
+		if err != nil {
+			return err
+		}
+	}
+}
+
+// Join the shards and write the data segment to dst.
+//
+// Only the data shards are considered.
+//
+// You must supply the exact output size you want.
+// If there are to few shards given, ErrTooFewShards will be returned.
+// If the total data size is less than outSize, ErrShortData will be returned.
+func (r rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
+	// Do we have enough shards?
+	if len(shards) < r.r.DataShards {
+		return ErrTooFewShards
+	}
+
+	// Trim off parity shards if any
+	shards = shards[:r.r.DataShards]
+	for i := range shards {
+		if shards[i] == nil {
+			return StreamReadError{Err: ErrShardNoData, Stream: i}
+		}
+	}
+	// Join all shards
+	src := io.MultiReader(shards...)
+
+	// Copy data to dst
+	n, err := io.CopyN(dst, src, outSize)
+	if err == io.EOF {
+		return ErrShortData
+	}
+	if err != nil {
+		return err
+	}
+	if n != outSize {
+		return ErrShortData
+	}
+	return nil
+}
+
+// Split a an input stream into the number of shards given to the encoder.
+//
+// The data will be split into equally sized shards.
+// If the data size isn't dividable by the number of shards,
+// the last shard will contain extra zeros.
+//
+// You must supply the total size of your input.
+// 'ErrShortData' will be returned if it is unable to retrieve the
+// number of bytes indicated.
+func (r rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
+	if size == 0 {
+		return ErrShortData
+	}
+	if len(dst) != r.r.DataShards {
+		return ErrInvShardNum
+	}
+
+	for i := range dst {
+		if dst[i] == nil {
+			return StreamWriteError{Err: ErrShardNoData, Stream: i}
+		}
+	}
+
+	// Calculate number of bytes per shard.
+	perShard := (size + int64(r.r.DataShards) - 1) / int64(r.r.DataShards)
+
+	// Pad data to r.Shards*perShard.
+	padding := make([]byte, (int64(r.r.Shards)*perShard)-size)
+	data = io.MultiReader(data, bytes.NewBuffer(padding))
+
+	// Split into equal-length shards and copy.
+	for i := range dst {
+		n, err := io.CopyN(dst[i], data, perShard)
+		if err != io.EOF && err != nil {
+			return err
+		}
+		if n != perShard {
+			return ErrShortData
+		}
+	}
+
+	return nil
+}

+ 0 - 9
vendor/github.com/templexxx/reedsolomon/.travis.yml

@@ -1,9 +0,0 @@
-language: go
-go:
-    - 1.9
-
-install:
-    - go get github.com/templexxx/reedsolomon
-
-script:
-    - go test -v

+ 0 - 108
vendor/github.com/templexxx/reedsolomon/README.md

@@ -1,108 +0,0 @@
-# Reed-Solomon
-
-[![GoDoc][1]][2] [![MIT licensed][3]][4] [![Build Status][5]][6] [![Go Report Card][7]][8] 
-
-[1]: https://godoc.org/github.com/templexxx/reedsolomon?status.svg
-[2]: https://godoc.org/github.com/templexxx/reedsolomon
-[3]: https://img.shields.io/badge/license-MIT-blue.svg
-[4]: LICENSE
-[5]: https://travis-ci.org/templexxx/reedsolomon.svg?branch=master
-[6]: https://travis-ci.org/templexxx/reedsolomon
-[7]: https://goreportcard.com/badge/github.com/templexxx/reedsolomon
-[8]: https://goreportcard.com/report/github.com/templexxx/reedsolomon
-
-
-## Introduction:
-1.  Reed-Solomon Erasure Code engine in pure Go.
-2.  Super Fast: more than 10GB/s per physics core ( 10+4, 4KB per vector, Macbook Pro 2.8 GHz Intel Core i7 )
-
-## Installation
-To get the package use the standard:
-```bash
-go get github.com/templexxx/reedsolomon
-```
-
-## Documentation
-See the associated [GoDoc](http://godoc.org/github.com/templexxx/reedsolomon)
-
-## Specification
-### GOARCH
-1. All arch are supported
-2. 0.1.0 need go1.9 for sync.Map in AMD64
-
-### Math
-1. Coding over in GF(2^8)
-2. Primitive Polynomial: x^8 + x^4 + x^3 + x^2 + 1 (0x1d)
-3. mathtool/gentbls.go : generator Primitive Polynomial and it's log table, exp table, multiply table, inverse table etc. We can get more info about how galois field work
-4. mathtool/cntinverse.go : calculate how many inverse matrix will have in different RS codes config
-5. Both of Cauchy and Vandermonde Matrix are supported. Vandermonde need more operations for preserving the property that any square subset of rows is invertible
-
-### Why so fast?
-These three parts will cost too much time:
-
-1. lookup galois-field tables
-2. read/write memory
-3. calculate inverse matrix in the reconstruct process
-
-SIMD will solve no.1
-
-Cache-friendly codes will help to solve no.2 & no.3, and more, use a sync.Map for cache inverse matrix, it will help to save about 1000ns when we need same matrix. 
-
-## Performance
-
-Performance depends mainly on:
-
-1. CPU instruction extension( AVX2 or SSSE3 or none )
-2. number of data/parity vects
-3. unit size of calculation ( see it in rs_amd64.go )
-4. size of shards
-5. speed of memory (waste so much time on read/write mem, :D )
-6. performance of CPU
-7. the way of using ( reuse memory)
-
-And we must know the benchmark test is quite different with encoding/decoding in practice.
-
-Because in benchmark test loops, the CPU Cache will help a lot. In practice, we must reuse the memory to make the performance become as good as the benchmark test.
-
-Example of performance on my MacBook 2017 i7 2.8GHz. 10+4 (with 0.1.0).
-
-### Encoding:
-
-| Vector size | Speed (MB/S) |
-|----------------|--------------|
-| 1400B              |    7655.02  |
-| 4KB              |       10551.37  |
-| 64KB              |       9297.25 |
-| 1MB              |      6829.89 |
-| 16MB              |      6312.83 |
-
-### Reconstruct (use nil to point which one need repair):
-
-| Vector size | Speed (MB/S) |
-|----------------|--------------|
-| 1400B              |    4124.85  |
-| 4KB              |       5715.45 |
-| 64KB              |       6050.06 |
-| 1MB              |      5001.21 |
-| 16MB              |      5043.04 |
-
-### ReconstructWithPos (use a position list to point which one need repair, reuse the memory):
-
-| Vector size | Speed (MB/S) |
-|----------------|--------------|
-| 1400B              |    6170.24  |
-| 4KB              |       9444.86 |
-| 64KB              |       9311.30 |
-| 1MB              |      6781.06 |
-| 16MB              |      6285.34 |
-
-**reconstruct benchmark tests here run with inverse matrix cache, if there is no cache, it will cost more time( about 1000ns)**
-
-## Who is using this?
-
-1. https://github.com/xtaci/kcp-go -- A Production-Grade Reliable-UDP Library for golang
-
-## Links & Thanks
-* [Klauspost ReedSolomon](https://github.com/klauspost/reedsolomon)
-* [intel ISA-L](https://github.com/01org/isa-l)
-* [GF SIMD] (http://www.ssrc.ucsc.edu/papers/plank-fast13.pdf)

+ 0 - 156
vendor/github.com/templexxx/reedsolomon/matrix.go

@@ -1,156 +0,0 @@
-package reedsolomon
-
-import "errors"
-
-type matrix []byte
-
-func genEncMatrixCauchy(d, p int) matrix {
-	t := d + p
-	m := make([]byte, t*d)
-	for i := 0; i < d; i++ {
-		m[i*d+i] = byte(1)
-	}
-
-	d2 := d * d
-	for i := d; i < t; i++ {
-		for j := 0; j < d; j++ {
-			d := i ^ j
-			a := inverseTbl[d]
-			m[d2] = byte(a)
-			d2++
-		}
-	}
-	return m
-}
-
-func gfExp(b byte, n int) byte {
-	if n == 0 {
-		return 1
-	}
-	if b == 0 {
-		return 0
-	}
-	a := logTbl[b]
-	ret := int(a) * n
-	for ret >= 255 {
-		ret -= 255
-	}
-	return byte(expTbl[ret])
-}
-
-func genVandMatrix(vm []byte, t, d int) {
-	for i := 0; i < t; i++ {
-		for j := 0; j < d; j++ {
-			vm[i*d+j] = gfExp(byte(i), j)
-		}
-	}
-}
-
-func (m matrix) mul(right matrix, rows, cols int, r []byte) {
-	for i := 0; i < rows; i++ {
-		for j := 0; j < cols; j++ {
-			var v byte
-			for k := 0; k < cols; k++ {
-				v ^= gfMul(m[i*cols+k], right[k*cols+j])
-			}
-			r[i*cols+j] = v
-		}
-	}
-}
-
-func genEncMatrixVand(d, p int) (matrix, error) {
-	t := d + p
-	buf := make([]byte, (2*t+4*d)*d)
-	vm := buf[:t*d]
-	genVandMatrix(vm, t, d)
-	top := buf[t*d : (t+d)*d]
-	copy(top, vm[:d*d])
-	raw := buf[(t+d)*d : (t+3*d)*d]
-	im := buf[(t+3*d)*d : (t+4*d)*d]
-	err := matrix(top).invert(raw, d, im)
-	if err != nil {
-		return nil, err
-	}
-	r := buf[(t+4*d)*d : (2*t+4*d)*d]
-	matrix(vm).mul(im, t, d, r)
-	return matrix(r), nil
-}
-
-// [I|m'] -> [m']
-func (m matrix) subMatrix(n int, r []byte) {
-	for i := 0; i < n; i++ {
-		off := i * n
-		copy(r[off:off+n], m[2*off+n:2*(off+n)])
-	}
-}
-
-func (m matrix) invert(raw matrix, n int, im []byte) error {
-	// [m] -> [m|I]
-	for i := 0; i < n; i++ {
-		t := i * n
-		copy(raw[2*t:2*t+n], m[t:t+n])
-		raw[2*t+i+n] = byte(1)
-	}
-	err := gauss(raw, n)
-	if err != nil {
-		return err
-	}
-	raw.subMatrix(n, im)
-	return nil
-}
-
-func (m matrix) swap(i, j, n int) {
-	for k := 0; k < n; k++ {
-		m[i*n+k], m[j*n+k] = m[j*n+k], m[i*n+k]
-	}
-}
-
-func gfMul(a, b byte) byte {
-	return mulTbl[a][b]
-}
-
-var errSingular = errors.New("rs.invert: matrix is singular")
-
-// [m|I] -> [I|m']
-func gauss(m matrix, n int) error {
-	n2 := 2 * n
-	for i := 0; i < n; i++ {
-		if m[i*n2+i] == 0 {
-			for j := i + 1; j < n; j++ {
-				if m[j*n2+i] != 0 {
-					m.swap(i, j, n2)
-					break
-				}
-			}
-		}
-		if m[i*n2+i] == 0 {
-			return errSingular
-		}
-		if m[i*n2+i] != 1 {
-			d := m[i*n2+i]
-			scale := inverseTbl[d]
-			for c := 0; c < n2; c++ {
-				m[i*n2+c] = gfMul(m[i*n2+c], scale)
-			}
-		}
-		for j := i + 1; j < n; j++ {
-			if m[j*n2+i] != 0 {
-				scale := m[j*n2+i]
-				for c := 0; c < n2; c++ {
-					m[j*n2+c] ^= gfMul(scale, m[i*n2+c])
-				}
-			}
-		}
-	}
-	for k := 0; k < n; k++ {
-		for j := 0; j < k; j++ {
-			if m[j*n2+k] != 0 {
-				scale := m[j*n2+k]
-				for c := 0; c < n2; c++ {
-					m[j*n2+c] ^= gfMul(scale, m[k*n2+c])
-				}
-			}
-		}
-	}
-	return nil
-}

+ 0 - 280
vendor/github.com/templexxx/reedsolomon/rs.go

@@ -1,280 +0,0 @@
-/*
-	Reed-Solomon Codes over GF(2^8)
-	Primitive Polynomial:  x^8+x^4+x^3+x^2+1
-	Galois Filed arithmetic using Intel SIMD instructions (AVX2 or SSSE3)
-*/
-
-package reedsolomon
-
-import "errors"
-
-// Encoder implements for Reed-Solomon Encoding/Reconstructing
-type Encoder interface {
-	// Encode multiply generator-matrix with data
-	// len(vects) must be equal with num of data+parity
-	Encode(vects [][]byte) error
-	// Result of reconst will be put into origin position of vects
-	// it means if you lost vects[0], after reconst the vects[0]'s data will be back in vects[0]
-
-	// Reconstruct repair lost data & parity
-	// Set vect nil if lost
-	Reconstruct(vects [][]byte) error
-	// Reconstruct repair lost data
-	// Set vect nil if lost
-	ReconstructData(vects [][]byte) error
-	// ReconstWithPos repair lost data&parity with has&lost vects position
-	// Save bandwidth&disk I/O (cmp with Reconstruct, if the lost is less than num of parity)
-	// As erasure codes, we must know which vect is broken,
-	// so it's necessary to provide such APIs
-	// len(has) must equal num of data vects
-	// Example:
-	// in 3+2, the whole position: [0,1,2,3,4]
-	// if lost vects[0]
-	// the "has" could be [1,2,3] or [1,2,4] or ...
-	// then you must be sure that vects[1] vects[2] vects[3] have correct data (if the "has" is [1,2,3])
-	// the "dLost" will be [0]
-	// ps:
-	// 1. the above lists are in increasing orders  TODO support out-of-order
-	// 2. each vect has same len, don't set it nil
-	// so we don't need to make slice
-	ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error
-	//// ReconstWithPos repair lost data with survived&lost vects position
-	//// Don't need to append position of parity lost into "lost"
-	ReconstDataWithPos(vects [][]byte, has, dLost []int) error
-}
-
-func checkCfg(d, p int) error {
-	if (d <= 0) || (p <= 0) {
-		return errors.New("rs.New: data or parity <= 0")
-	}
-	if d+p >= 256 {
-		return errors.New("rs.New: data+parity >= 256")
-	}
-	return nil
-}
-
-// New create an Encoder (vandermonde matrix as Encoding matrix)
-func New(data, parity int) (enc Encoder, err error) {
-	err = checkCfg(data, parity)
-	if err != nil {
-		return
-	}
-	e, err := genEncMatrixVand(data, parity)
-	if err != nil {
-		return
-	}
-	return newRS(data, parity, e), nil
-}
-
-// NewCauchy create an Encoder (cauchy matrix as Generator Matrix)
-func NewCauchy(data, parity int) (enc Encoder, err error) {
-	err = checkCfg(data, parity)
-	if err != nil {
-		return
-	}
-	e := genEncMatrixCauchy(data, parity)
-	return newRS(data, parity, e), nil
-}
-
-type encBase struct {
-	data   int
-	parity int
-	encode []byte
-	gen    []byte
-}
-
-func checkEnc(d, p int, vs [][]byte) (size int, err error) {
-	total := len(vs)
-	if d+p != total {
-		err = errors.New("rs.checkER: vects not match rs args")
-		return
-	}
-	size = len(vs[0])
-	if size == 0 {
-		err = errors.New("rs.checkER: vects size = 0")
-		return
-	}
-	for i := 1; i < total; i++ {
-		if len(vs[i]) != size {
-			err = errors.New("rs.checkER: vects size mismatch")
-			return
-		}
-	}
-	return
-}
-
-func (e *encBase) Encode(vects [][]byte) (err error) {
-	d := e.data
-	p := e.parity
-	_, err = checkEnc(d, p, vects)
-	if err != nil {
-		return
-	}
-	dv := vects[:d]
-	pv := vects[d:]
-	g := e.gen
-	for i := 0; i < d; i++ {
-		for j := 0; j < p; j++ {
-			if i != 0 {
-				mulVectAdd(g[j*d+i], dv[i], pv[j])
-			} else {
-				mulVect(g[j*d], dv[0], pv[j])
-			}
-		}
-	}
-	return
-}
-
-func mulVect(c byte, a, b []byte) {
-	t := mulTbl[c]
-	for i := 0; i < len(a); i++ {
-		b[i] = t[a[i]]
-	}
-}
-
-func mulVectAdd(c byte, a, b []byte) {
-	t := mulTbl[c]
-	for i := 0; i < len(a); i++ {
-		b[i] ^= t[a[i]]
-	}
-}
-
-func (e *encBase) Reconstruct(vects [][]byte) (err error) {
-	return e.reconstruct(vects, false)
-}
-
-func (e *encBase) ReconstructData(vects [][]byte) (err error) {
-	return e.reconstruct(vects, true)
-}
-
-func (e *encBase) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
-	return e.reconstWithPos(vects, has, dLost, pLost, false)
-}
-
-func (e *encBase) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
-	return e.reconstWithPos(vects, has, dLost, nil, true)
-}
-
-func (e *encBase) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
-	d := e.data
-	em := e.encode
-	dCnt := len(dLost)
-	size := len(vects[has[0]])
-	if dCnt != 0 {
-		vtmp := make([][]byte, d+dCnt)
-		for i, p := range has {
-			vtmp[i] = vects[p]
-		}
-		for i, p := range dLost {
-			if len(vects[p]) == 0 {
-				vects[p] = make([]byte, size)
-			}
-			vtmp[i+d] = vects[p]
-		}
-		matrixbuf := make([]byte, 4*d*d+dCnt*d)
-		m := matrixbuf[:d*d]
-		for i, l := range has {
-			copy(m[i*d:i*d+d], em[l*d:l*d+d])
-		}
-		raw := matrixbuf[d*d : 3*d*d]
-		im := matrixbuf[3*d*d : 4*d*d]
-		err2 := matrix(m).invert(raw, d, im)
-		if err2 != nil {
-			return err2
-		}
-		g := matrixbuf[4*d*d:]
-		for i, l := range dLost {
-			copy(g[i*d:i*d+d], im[l*d:l*d+d])
-		}
-		etmp := &encBase{data: d, parity: dCnt, gen: g}
-		err2 = etmp.Encode(vtmp[:d+dCnt])
-		if err2 != nil {
-			return err2
-		}
-	}
-	if dataOnly {
-		return
-	}
-	pCnt := len(pLost)
-	if pCnt != 0 {
-		vtmp := make([][]byte, d+pCnt)
-		g := make([]byte, pCnt*d)
-		for i, l := range pLost {
-			copy(g[i*d:i*d+d], em[l*d:l*d+d])
-		}
-		for i := 0; i < d; i++ {
-			vtmp[i] = vects[i]
-		}
-		for i, p := range pLost {
-			if len(vects[p]) == 0 {
-				vects[p] = make([]byte, size)
-			}
-			vtmp[i+d] = vects[p]
-		}
-		etmp := &encBase{data: d, parity: pCnt, gen: g}
-		err2 := etmp.Encode(vtmp[:d+pCnt])
-		if err2 != nil {
-			return err2
-		}
-	}
-	return
-}
-
-func (e *encBase) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
-	d := e.data
-	p := e.parity
-	// TODO check more, maybe element in has show in lost & deal with len(has) > d
-	if len(has) != d {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	dCnt := len(dLost)
-	if dCnt > p {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	pCnt := len(pLost)
-	if pCnt > p {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	return e.reconst(vects, has, dLost, pLost, dataOnly)
-}
-
-func (e *encBase) reconstruct(vects [][]byte, dataOnly bool) (err error) {
-	d := e.data
-	p := e.parity
-	t := d + p
-	listBuf := make([]int, t+p)
-	has := listBuf[:d]
-	dLost := listBuf[d:t]
-	pLost := listBuf[t : t+p]
-	hasCnt, dCnt, pCnt := 0, 0, 0
-	for i := 0; i < t; i++ {
-		if vects[i] != nil {
-			if hasCnt < d {
-				has[hasCnt] = i
-				hasCnt++
-			}
-		} else {
-			if i < d {
-				if dCnt < p {
-					dLost[dCnt] = i
-					dCnt++
-				} else {
-					return errors.New("rs.Reconst: not enough vects")
-				}
-			} else {
-				if pCnt < p {
-					pLost[pCnt] = i
-					pCnt++
-				} else {
-					return errors.New("rs.Reconst: not enough vects")
-				}
-			}
-		}
-	}
-	if hasCnt != d {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	dLost = dLost[:dCnt]
-	pLost = pLost[:pCnt]
-	return e.reconst(vects, has, dLost, pLost, dataOnly)
-}

+ 0 - 868
vendor/github.com/templexxx/reedsolomon/rs_amd64.go

@@ -1,868 +0,0 @@
-package reedsolomon
-
-import (
-	"errors"
-	"sync"
-
-	"github.com/templexxx/cpufeat"
-)
-
-// SIMD Instruction Extensions
-const (
-	none = iota
-	avx2
-	ssse3
-)
-
-var extension = none
-
-func init() {
-	getEXT()
-}
-
-func getEXT() {
-	if cpufeat.X86.HasAVX2 {
-		extension = avx2
-		return
-	} else if cpufeat.X86.HasSSSE3 {
-		extension = ssse3
-		return
-	} else {
-		extension = none
-		return
-	}
-}
-
-//go:noescape
-func copy32B(dst, src []byte) // Need SSE2(introduced in 2001)
-
-func initTbl(g matrix, rows, cols int, tbl []byte) {
-	off := 0
-	for i := 0; i < cols; i++ {
-		for j := 0; j < rows; j++ {
-			c := g[j*cols+i]
-			t := lowhighTbl[c][:]
-			copy32B(tbl[off:off+32], t)
-			off += 32
-		}
-	}
-}
-
-// At most 3060 inverse matrix (when data=14, parity=4, calc by mathtool/cntinverse)
-// In practice,  data usually below 12, parity below 5
-func okCache(data, parity int) bool {
-	if data < 15 && parity < 5 { // you can change it, but the data+parity can't be bigger than 32 (tips: see the codes about make inverse matrix)
-		return true
-	}
-	return false
-}
-
-type (
-	encSSSE3 encSIMD
-	encAVX2  encSIMD
-	encSIMD  struct {
-		data   int
-		parity int
-		encode matrix
-		gen    matrix
-		tbl    []byte
-		// inverse matrix cache is design for small vect size ( < 4KB )
-		// it will save time for calculating inverse matrix
-		// but it's not so important for big vect size
-		enableCache  bool
-		inverseCache iCache
-	}
-	iCache struct {
-		sync.RWMutex
-		data map[uint32][]byte
-	}
-)
-
-func newRS(d, p int, em matrix) (enc Encoder) {
-	g := em[d*d:]
-	if extension == none {
-		return &encBase{data: d, parity: p, encode: em, gen: g}
-	}
-	t := make([]byte, d*p*32)
-	initTbl(g, p, d, t)
-	ok := okCache(d, p)
-	if extension == avx2 {
-		e := &encAVX2{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
-			inverseCache: iCache{data: make(map[uint32][]byte)}}
-		return e
-	}
-	e := &encSSSE3{data: d, parity: p, encode: em, gen: g, tbl: t, enableCache: ok,
-		inverseCache: iCache{data: make(map[uint32][]byte)}}
-	return e
-}
-
-// Size of sub-vector
-const unit int = 16 * 1024
-
-func getDo(n int) int {
-	if n < unit {
-		c := n >> 4
-		if c == 0 {
-			return unit
-		}
-		return c << 4
-	}
-	return unit
-}
-
-func (e *encAVX2) Encode(vects [][]byte) (err error) {
-	d := e.data
-	p := e.parity
-	size, err := checkEnc(d, p, vects)
-	if err != nil {
-		return
-	}
-	dv := vects[:d]
-	pv := vects[d:]
-	start, end := 0, 0
-	do := getDo(size)
-	for start < size {
-		end = start + do
-		if end <= size {
-			e.matrixMul(start, end, dv, pv)
-			start = end
-		} else {
-			e.matrixMulRemain(start, size, dv, pv)
-			start = size
-		}
-	}
-	return
-}
-
-//go:noescape
-func mulVectAVX2(tbl, d, p []byte)
-
-//go:noescape
-func mulVectAddAVX2(tbl, d, p []byte)
-
-func (e *encAVX2) matrixMul(start, end int, dv, pv [][]byte) {
-	d := e.data
-	p := e.parity
-	tbl := e.tbl
-	off := 0
-	for i := 0; i < d; i++ {
-		for j := 0; j < p; j++ {
-			t := tbl[off : off+32]
-			if i != 0 {
-				mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
-			} else {
-				mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
-			}
-			off += 32
-		}
-	}
-}
-
-func (e *encAVX2) matrixMulRemain(start, end int, dv, pv [][]byte) {
-	undone := end - start
-	do := (undone >> 4) << 4
-	d := e.data
-	p := e.parity
-	tbl := e.tbl
-	if do >= 16 {
-		end2 := start + do
-		off := 0
-		for i := 0; i < d; i++ {
-			for j := 0; j < p; j++ {
-				t := tbl[off : off+32]
-				if i != 0 {
-					mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
-				} else {
-					mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
-				}
-				off += 32
-			}
-		}
-		start = end
-	}
-	if undone > do {
-		// may recalculate some data, but still improve a lot
-		start2 := end - 16
-		if start2 >= 0 {
-			off := 0
-			for i := 0; i < d; i++ {
-				for j := 0; j < p; j++ {
-					t := tbl[off : off+32]
-					if i != 0 {
-						mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
-					} else {
-						mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
-					}
-					off += 32
-				}
-			}
-		} else {
-			g := e.gen
-			for i := 0; i < d; i++ {
-				for j := 0; j < p; j++ {
-					if i != 0 {
-						mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
-					} else {
-						mulVect(g[j*d], dv[0][start:], pv[j][start:])
-					}
-				}
-			}
-		}
-	}
-}
-
-// use generator-matrix but not tbls for encoding
-// it's design for reconstructing
-// for small vects, it cost to much time on initTbl, so drop it
-// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
-func (e *encAVX2) encodeGen(vects [][]byte) (err error) {
-	d := e.data
-	p := e.parity
-	size, err := checkEnc(d, p, vects)
-	if err != nil {
-		return
-	}
-	dv := vects[:d]
-	pv := vects[d:]
-	start, end := 0, 0
-	do := getDo(size)
-	for start < size {
-		end = start + do
-		if end <= size {
-			e.matrixMulGen(start, end, dv, pv)
-			start = end
-		} else {
-			e.matrixMulRemainGen(start, size, dv, pv)
-			start = size
-		}
-	}
-	return
-}
-
-func (e *encAVX2) matrixMulGen(start, end int, dv, pv [][]byte) {
-	d := e.data
-	p := e.parity
-	g := e.gen
-	for i := 0; i < d; i++ {
-		for j := 0; j < p; j++ {
-			t := lowhighTbl[g[j*d+i]][:]
-			if i != 0 {
-				mulVectAddAVX2(t, dv[i][start:end], pv[j][start:end])
-			} else {
-				mulVectAVX2(t, dv[0][start:end], pv[j][start:end])
-			}
-		}
-	}
-}
-
-func (e *encAVX2) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
-	undone := end - start
-	do := (undone >> 4) << 4
-	d := e.data
-	p := e.parity
-	g := e.gen
-	if do >= 16 {
-		end2 := start + do
-		for i := 0; i < d; i++ {
-			for j := 0; j < p; j++ {
-				t := lowhighTbl[g[j*d+i]][:]
-				if i != 0 {
-					mulVectAddAVX2(t, dv[i][start:end2], pv[j][start:end2])
-				} else {
-					mulVectAVX2(t, dv[0][start:end2], pv[j][start:end2])
-				}
-			}
-		}
-		start = end
-	}
-	if undone > do {
-		start2 := end - 16
-		if start2 >= 0 {
-			for i := 0; i < d; i++ {
-				for j := 0; j < p; j++ {
-					t := lowhighTbl[g[j*d+i]][:]
-					if i != 0 {
-						mulVectAddAVX2(t, dv[i][start2:end], pv[j][start2:end])
-					} else {
-						mulVectAVX2(t, dv[0][start2:end], pv[j][start2:end])
-					}
-				}
-			}
-		} else {
-			for i := 0; i < d; i++ {
-				for j := 0; j < p; j++ {
-					if i != 0 {
-						mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
-					} else {
-						mulVect(g[j*d], dv[0][start:], pv[j][start:])
-					}
-				}
-			}
-		}
-	}
-}
-
-func (e *encAVX2) Reconstruct(vects [][]byte) (err error) {
-	return e.reconstruct(vects, false)
-}
-
-func (e *encAVX2) ReconstructData(vects [][]byte) (err error) {
-	return e.reconstruct(vects, true)
-}
-
-func (e *encAVX2) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
-	return e.reconstWithPos(vects, has, dLost, pLost, false)
-}
-
-func (e *encAVX2) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
-	return e.reconstWithPos(vects, has, dLost, nil, true)
-}
-
-func (e *encAVX2) makeGen(has, dLost []int) (gen []byte, err error) {
-	d := e.data
-	em := e.encode
-	cnt := len(dLost)
-	if !e.enableCache {
-		matrixbuf := make([]byte, 4*d*d+cnt*d)
-		m := matrixbuf[:d*d]
-		for i, l := range has {
-			copy(m[i*d:i*d+d], em[l*d:l*d+d])
-		}
-		raw := matrixbuf[d*d : 3*d*d]
-		im := matrixbuf[3*d*d : 4*d*d]
-		err2 := matrix(m).invert(raw, d, im)
-		if err2 != nil {
-			return nil, err2
-		}
-		g := matrixbuf[4*d*d:]
-		for i, l := range dLost {
-			copy(g[i*d:i*d+d], im[l*d:l*d+d])
-		}
-		return g, nil
-	}
-	var ikey uint32
-	for _, p := range has {
-		ikey += 1 << uint8(p)
-	}
-	e.inverseCache.RLock()
-	v, ok := e.inverseCache.data[ikey]
-	if ok {
-		im := v
-		g := make([]byte, cnt*d)
-		for i, l := range dLost {
-			copy(g[i*d:i*d+d], im[l*d:l*d+d])
-		}
-		e.inverseCache.RUnlock()
-		return g, nil
-	}
-	e.inverseCache.RUnlock()
-	matrixbuf := make([]byte, 4*d*d+cnt*d)
-	m := matrixbuf[:d*d]
-	for i, l := range has {
-		copy(m[i*d:i*d+d], em[l*d:l*d+d])
-	}
-	raw := matrixbuf[d*d : 3*d*d]
-	im := matrixbuf[3*d*d : 4*d*d]
-	err2 := matrix(m).invert(raw, d, im)
-	if err2 != nil {
-		return nil, err2
-	}
-	e.inverseCache.Lock()
-	e.inverseCache.data[ikey] = im
-	e.inverseCache.Unlock()
-	g := matrixbuf[4*d*d:]
-	for i, l := range dLost {
-		copy(g[i*d:i*d+d], im[l*d:l*d+d])
-	}
-	return g, nil
-}
-
-func (e *encAVX2) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
-	d := e.data
-	em := e.encode
-	dCnt := len(dLost)
-	size := len(vects[has[0]])
-	if dCnt != 0 {
-		vtmp := make([][]byte, d+dCnt)
-		for i, p := range has {
-			vtmp[i] = vects[p]
-		}
-		for i, p := range dLost {
-			if len(vects[p]) == 0 {
-				vects[p] = make([]byte, size)
-			}
-			vtmp[i+d] = vects[p]
-		}
-		g, err2 := e.makeGen(has, dLost)
-		if err2 != nil {
-			return
-		}
-		etmp := &encAVX2{data: d, parity: dCnt, gen: g}
-		err2 = etmp.encodeGen(vtmp)
-		if err2 != nil {
-			return err2
-		}
-	}
-	if dataOnly {
-		return
-	}
-	pCnt := len(pLost)
-	if pCnt != 0 {
-		g := make([]byte, pCnt*d)
-		for i, l := range pLost {
-			copy(g[i*d:i*d+d], em[l*d:l*d+d])
-		}
-		vtmp := make([][]byte, d+pCnt)
-		for i := 0; i < d; i++ {
-			vtmp[i] = vects[i]
-		}
-		for i, p := range pLost {
-			if len(vects[p]) == 0 {
-				vects[p] = make([]byte, size)
-			}
-			vtmp[i+d] = vects[p]
-		}
-		etmp := &encAVX2{data: d, parity: pCnt, gen: g}
-		err2 := etmp.encodeGen(vtmp)
-		if err2 != nil {
-			return err2
-		}
-	}
-	return
-}
-
-func (e *encAVX2) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
-	d := e.data
-	p := e.parity
-	if len(has) != d {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	dCnt := len(dLost)
-	if dCnt > p {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	pCnt := len(pLost)
-	if pCnt > p {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	return e.reconst(vects, has, dLost, pLost, dataOnly)
-}
-
-func (e *encAVX2) reconstruct(vects [][]byte, dataOnly bool) (err error) {
-	d := e.data
-	p := e.parity
-	t := d + p
-	listBuf := make([]int, t+p)
-	has := listBuf[:d]
-	dLost := listBuf[d:t]
-	pLost := listBuf[t : t+p]
-	hasCnt, dCnt, pCnt := 0, 0, 0
-	for i := 0; i < t; i++ {
-		if vects[i] != nil {
-			if hasCnt < d {
-				has[hasCnt] = i
-				hasCnt++
-			}
-		} else {
-			if i < d {
-				if dCnt < p {
-					dLost[dCnt] = i
-					dCnt++
-				} else {
-					return errors.New("rs.Reconst: not enough vects")
-				}
-			} else {
-				if pCnt < p {
-					pLost[pCnt] = i
-					pCnt++
-				} else {
-					return errors.New("rs.Reconst: not enough vects")
-				}
-			}
-		}
-	}
-	if hasCnt != d {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	dLost = dLost[:dCnt]
-	pLost = pLost[:pCnt]
-	return e.reconst(vects, has, dLost, pLost, dataOnly)
-}
-
-func (e *encSSSE3) Encode(vects [][]byte) (err error) {
-	d := e.data
-	p := e.parity
-	size, err := checkEnc(d, p, vects)
-	if err != nil {
-		return
-	}
-	dv := vects[:d]
-	pv := vects[d:]
-	start, end := 0, 0
-	do := getDo(size)
-	for start < size {
-		end = start + do
-		if end <= size {
-			e.matrixMul(start, end, dv, pv)
-			start = end
-		} else {
-			e.matrixMulRemain(start, size, dv, pv)
-			start = size
-		}
-	}
-	return
-}
-
-//go:noescape
-func mulVectSSSE3(tbl, d, p []byte)
-
-//go:noescape
-func mulVectAddSSSE3(tbl, d, p []byte)
-
-func (e *encSSSE3) matrixMul(start, end int, dv, pv [][]byte) {
-	d := e.data
-	p := e.parity
-	tbl := e.tbl
-	off := 0
-	for i := 0; i < d; i++ {
-		for j := 0; j < p; j++ {
-			t := tbl[off : off+32]
-			if i != 0 {
-				mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
-			} else {
-				mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
-			}
-			off += 32
-		}
-	}
-}
-
-func (e *encSSSE3) matrixMulRemain(start, end int, dv, pv [][]byte) {
-	undone := end - start
-	do := (undone >> 4) << 4
-	d := e.data
-	p := e.parity
-	tbl := e.tbl
-	if do >= 16 {
-		end2 := start + do
-		off := 0
-		for i := 0; i < d; i++ {
-			for j := 0; j < p; j++ {
-				t := tbl[off : off+32]
-				if i != 0 {
-					mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
-				} else {
-					mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
-				}
-				off += 32
-			}
-		}
-		start = end
-	}
-	if undone > do {
-		start2 := end - 16
-		if start2 >= 0 {
-			off := 0
-			for i := 0; i < d; i++ {
-				for j := 0; j < p; j++ {
-					t := tbl[off : off+32]
-					if i != 0 {
-						mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
-					} else {
-						mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
-					}
-					off += 32
-				}
-			}
-		} else {
-			g := e.gen
-			for i := 0; i < d; i++ {
-				for j := 0; j < p; j++ {
-					if i != 0 {
-						mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
-					} else {
-						mulVect(g[j*d], dv[0][start:], pv[j][start:])
-					}
-				}
-			}
-		}
-	}
-}
-
-// use generator-matrix but not tbls for encoding
-// it's design for reconstructing
-// for small vects, it cost to much time on initTbl, so drop it
-// and for big vects, the tbls can't impact much, because the cache will be filled with vects' data
-func (e *encSSSE3) encodeGen(vects [][]byte) (err error) {
-	d := e.data
-	p := e.parity
-	size, err := checkEnc(d, p, vects)
-	if err != nil {
-		return
-	}
-	dv := vects[:d]
-	pv := vects[d:]
-	start, end := 0, 0
-	do := getDo(size)
-	for start < size {
-		end = start + do
-		if end <= size {
-			e.matrixMulGen(start, end, dv, pv)
-			start = end
-		} else {
-			e.matrixMulRemainGen(start, size, dv, pv)
-			start = size
-		}
-	}
-	return
-}
-
-func (e *encSSSE3) matrixMulGen(start, end int, dv, pv [][]byte) {
-	d := e.data
-	p := e.parity
-	g := e.gen
-	for i := 0; i < d; i++ {
-		for j := 0; j < p; j++ {
-			t := lowhighTbl[g[j*d+i]][:]
-			if i != 0 {
-				mulVectAddSSSE3(t, dv[i][start:end], pv[j][start:end])
-			} else {
-				mulVectSSSE3(t, dv[0][start:end], pv[j][start:end])
-			}
-		}
-	}
-}
-
-func (e *encSSSE3) matrixMulRemainGen(start, end int, dv, pv [][]byte) {
-	undone := end - start
-	do := (undone >> 4) << 4
-	d := e.data
-	p := e.parity
-	g := e.gen
-	if do >= 16 {
-		end2 := start + do
-		for i := 0; i < d; i++ {
-			for j := 0; j < p; j++ {
-				t := lowhighTbl[g[j*d+i]][:]
-				if i != 0 {
-					mulVectAddSSSE3(t, dv[i][start:end2], pv[j][start:end2])
-				} else {
-					mulVectSSSE3(t, dv[0][start:end2], pv[j][start:end2])
-				}
-			}
-		}
-		start = end
-	}
-	if undone > do {
-		start2 := end - 16
-		if start2 >= 0 {
-			for i := 0; i < d; i++ {
-				for j := 0; j < p; j++ {
-					t := lowhighTbl[g[j*d+i]][:]
-					if i != 0 {
-						mulVectAddSSSE3(t, dv[i][start2:end], pv[j][start2:end])
-					} else {
-						mulVectSSSE3(t, dv[0][start2:end], pv[j][start2:end])
-					}
-				}
-			}
-		} else {
-			for i := 0; i < d; i++ {
-				for j := 0; j < p; j++ {
-					if i != 0 {
-						mulVectAdd(g[j*d+i], dv[i][start:], pv[j][start:])
-					} else {
-						mulVect(g[j*d], dv[0][start:], pv[j][start:])
-					}
-				}
-			}
-		}
-	}
-}
-
-func (e *encSSSE3) Reconstruct(vects [][]byte) (err error) {
-	return e.reconstruct(vects, false)
-}
-
-func (e *encSSSE3) ReconstructData(vects [][]byte) (err error) {
-	return e.reconstruct(vects, true)
-}
-
-func (e *encSSSE3) ReconstWithPos(vects [][]byte, has, dLost, pLost []int) error {
-	return e.reconstWithPos(vects, has, dLost, pLost, false)
-}
-
-func (e *encSSSE3) ReconstDataWithPos(vects [][]byte, has, dLost []int) error {
-	return e.reconstWithPos(vects, has, dLost, nil, true)
-}
-
-func (e *encSSSE3) makeGen(has, dLost []int) (gen []byte, err error) {
-	d := e.data
-	em := e.encode
-	cnt := len(dLost)
-	if !e.enableCache {
-		matrixbuf := make([]byte, 4*d*d+cnt*d)
-		m := matrixbuf[:d*d]
-		for i, l := range has {
-			copy(m[i*d:i*d+d], em[l*d:l*d+d])
-		}
-		raw := matrixbuf[d*d : 3*d*d]
-		im := matrixbuf[3*d*d : 4*d*d]
-		err2 := matrix(m).invert(raw, d, im)
-		if err2 != nil {
-			return nil, err2
-		}
-		g := matrixbuf[4*d*d:]
-		for i, l := range dLost {
-			copy(g[i*d:i*d+d], im[l*d:l*d+d])
-		}
-		return g, nil
-	}
-	var ikey uint32
-	for _, p := range has {
-		ikey += 1 << uint8(p)
-	}
-	e.inverseCache.RLock()
-	v, ok := e.inverseCache.data[ikey]
-	if ok {
-		im := v
-		g := make([]byte, cnt*d)
-		for i, l := range dLost {
-			copy(g[i*d:i*d+d], im[l*d:l*d+d])
-		}
-		e.inverseCache.RUnlock()
-		return g, nil
-	}
-	e.inverseCache.RUnlock()
-	matrixbuf := make([]byte, 4*d*d+cnt*d)
-	m := matrixbuf[:d*d]
-	for i, l := range has {
-		copy(m[i*d:i*d+d], em[l*d:l*d+d])
-	}
-	raw := matrixbuf[d*d : 3*d*d]
-	im := matrixbuf[3*d*d : 4*d*d]
-	err2 := matrix(m).invert(raw, d, im)
-	if err2 != nil {
-		return nil, err2
-	}
-	e.inverseCache.Lock()
-	e.inverseCache.data[ikey] = im
-	e.inverseCache.Unlock()
-	g := matrixbuf[4*d*d:]
-	for i, l := range dLost {
-		copy(g[i*d:i*d+d], im[l*d:l*d+d])
-	}
-	return g, nil
-}
-
-func (e *encSSSE3) reconst(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
-	d := e.data
-	em := e.encode
-	dCnt := len(dLost)
-	size := len(vects[has[0]])
-	if dCnt != 0 {
-		vtmp := make([][]byte, d+dCnt)
-		for i, p := range has {
-			vtmp[i] = vects[p]
-		}
-		for i, p := range dLost {
-			if len(vects[p]) == 0 {
-				vects[p] = make([]byte, size)
-			}
-			vtmp[i+d] = vects[p]
-		}
-		g, err2 := e.makeGen(has, dLost)
-		if err2 != nil {
-			return
-		}
-		etmp := &encSSSE3{data: d, parity: dCnt, gen: g}
-		err2 = etmp.encodeGen(vtmp)
-		if err2 != nil {
-			return err2
-		}
-	}
-	if dataOnly {
-		return
-	}
-	pCnt := len(pLost)
-	if pCnt != 0 {
-		g := make([]byte, pCnt*d)
-		for i, l := range pLost {
-			copy(g[i*d:i*d+d], em[l*d:l*d+d])
-		}
-		vtmp := make([][]byte, d+pCnt)
-		for i := 0; i < d; i++ {
-			vtmp[i] = vects[i]
-		}
-		for i, p := range pLost {
-			if len(vects[p]) == 0 {
-				vects[p] = make([]byte, size)
-			}
-			vtmp[i+d] = vects[p]
-		}
-		etmp := &encSSSE3{data: d, parity: pCnt, gen: g}
-		err2 := etmp.encodeGen(vtmp)
-		if err2 != nil {
-			return err2
-		}
-	}
-	return
-}
-
-func (e *encSSSE3) reconstWithPos(vects [][]byte, has, dLost, pLost []int, dataOnly bool) (err error) {
-	d := e.data
-	p := e.parity
-	if len(has) != d {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	dCnt := len(dLost)
-	if dCnt > p {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	pCnt := len(pLost)
-	if pCnt > p {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	return e.reconst(vects, has, dLost, pLost, dataOnly)
-}
-
-func (e *encSSSE3) reconstruct(vects [][]byte, dataOnly bool) (err error) {
-	d := e.data
-	p := e.parity
-	t := d + p
-	listBuf := make([]int, t+p)
-	has := listBuf[:d]
-	dLost := listBuf[d:t]
-	pLost := listBuf[t : t+p]
-	hasCnt, dCnt, pCnt := 0, 0, 0
-	for i := 0; i < t; i++ {
-		if vects[i] != nil {
-			if hasCnt < d {
-				has[hasCnt] = i
-				hasCnt++
-			}
-		} else {
-			if i < d {
-				if dCnt < p {
-					dLost[dCnt] = i
-					dCnt++
-				} else {
-					return errors.New("rs.Reconst: not enough vects")
-				}
-			} else {
-				if pCnt < p {
-					pLost[pCnt] = i
-					pCnt++
-				} else {
-					return errors.New("rs.Reconst: not enough vects")
-				}
-			}
-		}
-	}
-	if hasCnt != d {
-		return errors.New("rs.Reconst: not enough vects")
-	}
-	dLost = dLost[:dCnt]
-	pLost = pLost[:pCnt]
-	return e.reconst(vects, has, dLost, pLost, dataOnly)
-}

+ 0 - 401
vendor/github.com/templexxx/reedsolomon/rs_amd64.s

@@ -1,401 +0,0 @@
-// Reference: www.ssrc.ucsc.edu/Papers/plank-fast13.pdf
-
-#include "textflag.h"
-
-#define low_tbl Y0
-#define high_tbl Y1
-#define mask Y2
-#define in0  Y3
-#define in1  Y4
-#define in2  Y5
-#define in3  Y6
-#define in4  Y7
-#define in5  Y8
-#define in0_h  Y10
-#define in1_h  Y11
-#define in2_h  Y12
-#define in3_h  Y13
-#define in4_h  Y14
-#define in5_h  Y15
-
-#define in  BX
-#define out DI
-#define len R8
-#define pos R9
-
-#define tmp0 R10
-
-#define low_tblx X0
-#define high_tblx X1
-#define maskx X2
-#define in0x X3
-#define in0_hx X10
-#define tmp0x  X9
-#define tmp1x  X11
-#define tmp2x  X12
-#define tmp3x  X13
-
-
-// func mulVectAVX2(tbl, d, p []byte)
-TEXT ·mulVectAVX2(SB), NOSPLIT, $0
-    MOVQ         i+24(FP), in
-	MOVQ         o+48(FP), out
-	MOVQ         tbl+0(FP), tmp0
-	VMOVDQU      (tmp0), low_tblx
-	VMOVDQU      16(tmp0), high_tblx
-	MOVB         $0x0f, DX
-	LONG         $0x2069e3c4; WORD $0x00d2   // VPINSRB $0x00, EDX, XMM2, XMM2
-	VPBROADCASTB maskx, maskx
-	MOVQ         in_len+32(FP), len
-	TESTQ        $31, len
-	JNZ          one16b
-
-ymm:
-    VINSERTI128  $1, low_tblx, low_tbl, low_tbl
-    VINSERTI128  $1, high_tblx, high_tbl, high_tbl
-    VINSERTI128  $1, maskx, mask, mask
-    TESTQ        $255, len
-    JNZ          not_aligned
-
-// 256bytes/loop
-aligned:
-    MOVQ         $0, pos
-
-loop256b:
-	VMOVDQU (in)(pos*1), in0
-	VPSRLQ  $4, in0, in0_h
-	VPAND   mask, in0_h, in0_h
-	VPAND   mask, in0, in0
-	VPSHUFB in0_h, high_tbl, in0_h
-	VPSHUFB in0, low_tbl, in0
-	VPXOR   in0, in0_h, in0
-	VMOVDQU in0, (out)(pos*1)
-
-    VMOVDQU 32(in)(pos*1), in1
-	VPSRLQ  $4, in1, in1_h
-	VPAND   mask, in1_h, in1_h
-	VPAND   mask, in1, in1
-	VPSHUFB in1_h, high_tbl, in1_h
-	VPSHUFB in1, low_tbl, in1
-	VPXOR   in1, in1_h, in1
-	VMOVDQU in1, 32(out)(pos*1)
-
-    VMOVDQU 64(in)(pos*1), in2
-	VPSRLQ  $4, in2, in2_h
-	VPAND   mask, in2_h, in2_h
-	VPAND   mask, in2, in2
-	VPSHUFB in2_h, high_tbl, in2_h
-	VPSHUFB in2, low_tbl, in2
-	VPXOR   in2, in2_h, in2
-	VMOVDQU in2, 64(out)(pos*1)
-
-    VMOVDQU 96(in)(pos*1), in3
-	VPSRLQ  $4, in3, in3_h
-	VPAND   mask, in3_h, in3_h
-	VPAND   mask, in3, in3
-	VPSHUFB in3_h, high_tbl, in3_h
-	VPSHUFB in3, low_tbl, in3
-	VPXOR   in3, in3_h, in3
-	VMOVDQU in3, 96(out)(pos*1)
-
-    VMOVDQU 128(in)(pos*1), in4
-	VPSRLQ  $4, in4, in4_h
-	VPAND   mask, in4_h, in4_h
-	VPAND   mask, in4, in4
-	VPSHUFB in4_h, high_tbl, in4_h
-	VPSHUFB in4, low_tbl, in4
-	VPXOR   in4, in4_h, in4
-	VMOVDQU in4, 128(out)(pos*1)
-
-    VMOVDQU 160(in)(pos*1), in5
-	VPSRLQ  $4, in5, in5_h
-	VPAND   mask, in5_h, in5_h
-	VPAND   mask, in5, in5
-	VPSHUFB in5_h, high_tbl, in5_h
-	VPSHUFB in5, low_tbl, in5
-	VPXOR   in5, in5_h, in5
-	VMOVDQU in5, 160(out)(pos*1)
-
-    VMOVDQU 192(in)(pos*1), in0
-	VPSRLQ  $4, in0, in0_h
-	VPAND   mask, in0_h, in0_h
-	VPAND   mask, in0, in0
-	VPSHUFB in0_h, high_tbl, in0_h
-	VPSHUFB in0, low_tbl, in0
-	VPXOR   in0, in0_h, in0
-	VMOVDQU in0, 192(out)(pos*1)
-
-    VMOVDQU 224(in)(pos*1), in1
-	VPSRLQ  $4, in1, in1_h
-	VPAND   mask, in1_h, in1_h
-	VPAND   mask, in1, in1
-	VPSHUFB in1_h, high_tbl, in1_h
-	VPSHUFB in1, low_tbl, in1
-	VPXOR   in1, in1_h, in1
-	VMOVDQU in1, 224(out)(pos*1)
-
-	ADDQ    $256, pos
-	CMPQ    len, pos
-	JNE     loop256b
-	VZEROUPPER
-	RET
-
-not_aligned:
-    MOVQ    len, tmp0
-    ANDQ    $255, tmp0
-
-loop32b:
-    VMOVDQU -32(in)(len*1), in0
-	VPSRLQ  $4, in0, in0_h
-	VPAND   mask, in0_h, in0_h
-	VPAND   mask, in0, in0
-	VPSHUFB in0_h, high_tbl, in0_h
-	VPSHUFB in0, low_tbl, in0
-	VPXOR   in0, in0_h, in0
-	VMOVDQU in0, -32(out)(len*1)
-	SUBQ    $32, len
-	SUBQ    $32, tmp0
-	JG      loop32b
-	CMPQ    len, $256
-	JGE     aligned
-	VZEROUPPER
-	RET
-
-one16b:
-    VMOVDQU  -16(in)(len*1), in0x
-    VPSRLQ   $4, in0x, in0_hx
-    VPAND    maskx, in0x, in0x
-    VPAND    maskx, in0_hx, in0_hx
-    VPSHUFB  in0_hx, high_tblx, in0_hx
-    VPSHUFB  in0x, low_tblx, in0x
-    VPXOR    in0x, in0_hx, in0x
-	VMOVDQU  in0x, -16(out)(len*1)
-	SUBQ     $16, len
-	CMPQ     len, $0
-	JNE      ymm
-	RET
-
-// func mulVectAddAVX2(tbl, d, p []byte)
-TEXT ·mulVectAddAVX2(SB), NOSPLIT, $0
-    MOVQ         i+24(FP), in
-	MOVQ         o+48(FP), out
-	MOVQ         tbl+0(FP), tmp0
-	VMOVDQU      (tmp0), low_tblx
-	VMOVDQU      16(tmp0), high_tblx
-	MOVB         $0x0f, DX
-	LONG         $0x2069e3c4; WORD $0x00d2
-	VPBROADCASTB maskx, maskx
-	MOVQ         in_len+32(FP), len
-	TESTQ        $31, len
-	JNZ          one16b
-
-ymm:
-    VINSERTI128  $1, low_tblx, low_tbl, low_tbl
-    VINSERTI128  $1, high_tblx, high_tbl, high_tbl
-    VINSERTI128  $1, maskx, mask, mask
-    TESTQ        $255, len
-    JNZ          not_aligned
-
-aligned:
-    MOVQ         $0, pos
-
-loop256b:
-    VMOVDQU (in)(pos*1), in0
-	VPSRLQ  $4, in0, in0_h
-	VPAND   mask, in0_h, in0_h
-	VPAND   mask, in0, in0
-	VPSHUFB in0_h, high_tbl, in0_h
-	VPSHUFB in0, low_tbl, in0
-	VPXOR   in0, in0_h, in0
-	VPXOR   (out)(pos*1), in0, in0
-	VMOVDQU in0, (out)(pos*1)
-
-    VMOVDQU 32(in)(pos*1), in1
-	VPSRLQ  $4, in1, in1_h
-	VPAND   mask, in1_h, in1_h
-	VPAND   mask, in1, in1
-	VPSHUFB in1_h, high_tbl, in1_h
-	VPSHUFB in1, low_tbl, in1
-	VPXOR   in1, in1_h, in1
-	VPXOR   32(out)(pos*1), in1, in1
-	VMOVDQU in1, 32(out)(pos*1)
-
-    VMOVDQU 64(in)(pos*1), in2
-	VPSRLQ  $4, in2, in2_h
-	VPAND   mask, in2_h, in2_h
-	VPAND   mask, in2, in2
-	VPSHUFB in2_h, high_tbl, in2_h
-	VPSHUFB in2, low_tbl, in2
-	VPXOR   in2, in2_h, in2
-	VPXOR   64(out)(pos*1), in2, in2
-	VMOVDQU in2, 64(out)(pos*1)
-
-    VMOVDQU 96(in)(pos*1), in3
-	VPSRLQ  $4, in3, in3_h
-	VPAND   mask, in3_h, in3_h
-	VPAND   mask, in3, in3
-	VPSHUFB in3_h, high_tbl, in3_h
-	VPSHUFB in3, low_tbl, in3
-	VPXOR   in3, in3_h, in3
-	VPXOR   96(out)(pos*1), in3, in3
-	VMOVDQU in3, 96(out)(pos*1)
-
-    VMOVDQU 128(in)(pos*1), in4
-	VPSRLQ  $4, in4, in4_h
-	VPAND   mask, in4_h, in4_h
-	VPAND   mask, in4, in4
-	VPSHUFB in4_h, high_tbl, in4_h
-	VPSHUFB in4, low_tbl, in4
-	VPXOR   in4, in4_h, in4
-	VPXOR   128(out)(pos*1), in4, in4
-	VMOVDQU in4, 128(out)(pos*1)
-
-    VMOVDQU 160(in)(pos*1), in5
-	VPSRLQ  $4, in5, in5_h
-	VPAND   mask, in5_h, in5_h
-	VPAND   mask, in5, in5
-	VPSHUFB in5_h, high_tbl, in5_h
-	VPSHUFB in5, low_tbl, in5
-	VPXOR   in5, in5_h, in5
-	VPXOR   160(out)(pos*1), in5, in5
-	VMOVDQU in5, 160(out)(pos*1)
-
-    VMOVDQU 192(in)(pos*1), in0
-	VPSRLQ  $4, in0, in0_h
-	VPAND   mask, in0_h, in0_h
-	VPAND   mask, in0, in0
-	VPSHUFB in0_h, high_tbl, in0_h
-	VPSHUFB in0, low_tbl, in0
-	VPXOR   in0, in0_h, in0
-	VPXOR   192(out)(pos*1), in0, in0
-	VMOVDQU in0, 192(out)(pos*1)
-
-    VMOVDQU 224(in)(pos*1), in1
-	VPSRLQ  $4, in1, in1_h
-	VPAND   mask, in1_h, in1_h
-	VPAND   mask, in1, in1
-	VPSHUFB in1_h, high_tbl, in1_h
-	VPSHUFB in1, low_tbl, in1
-	VPXOR   in1, in1_h, in1
-	VPXOR   224(out)(pos*1), in1, in1
-	VMOVDQU in1, 224(out)(pos*1)
-
-	ADDQ    $256, pos
-	CMPQ    len, pos
-	JNE     loop256b
-	VZEROUPPER
-	RET
-
-not_aligned:
-    MOVQ    len, tmp0
-    ANDQ    $255, tmp0
-
-loop32b:
-    VMOVDQU -32(in)(len*1), in0
-	VPSRLQ  $4, in0, in0_h
-	VPAND   mask, in0_h, in0_h
-	VPAND   mask, in0, in0
-	VPSHUFB in0_h, high_tbl, in0_h
-	VPSHUFB in0, low_tbl, in0
-	VPXOR   in0, in0_h, in0
-	VPXOR   -32(out)(len*1), in0, in0
-	VMOVDQU in0, -32(out)(len*1)
-	SUBQ    $32, len
-	SUBQ    $32, tmp0
-	JG      loop32b
-	CMPQ    len, $256
-	JGE     aligned
-	VZEROUPPER
-	RET
-
-one16b:
-    VMOVDQU  -16(in)(len*1), in0x
-    VPSRLQ   $4, in0x, in0_hx
-    VPAND    maskx, in0x, in0x
-    VPAND    maskx, in0_hx, in0_hx
-    VPSHUFB  in0_hx, high_tblx, in0_hx
-    VPSHUFB  in0x, low_tblx, in0x
-    VPXOR    in0x, in0_hx, in0x
-    VPXOR    -16(out)(len*1), in0x, in0x
-	VMOVDQU  in0x, -16(out)(len*1)
-	SUBQ     $16, len
-	CMPQ     len, $0
-	JNE      ymm
-	RET
-
-// func mulVectSSSE3(tbl, d, p []byte)
-TEXT ·mulVectSSSE3(SB), NOSPLIT, $0
-    MOVQ    i+24(FP), in
-	MOVQ    o+48(FP), out
-	MOVQ    tbl+0(FP), tmp0
-	MOVOU   (tmp0), low_tblx
-	MOVOU   16(tmp0), high_tblx
-    MOVB    $15, tmp0
-    MOVQ    tmp0, maskx
-    PXOR    tmp0x, tmp0x
-   	PSHUFB  tmp0x, maskx
-	MOVQ    in_len+32(FP), len
-	SHRQ    $4, len
-
-loop:
-	MOVOU  (in), in0x
-	MOVOU  in0x, in0_hx
-	PSRLQ  $4, in0_hx
-	PAND   maskx, in0x
-	PAND   maskx, in0_hx
-	MOVOU  low_tblx, tmp1x
-	MOVOU  high_tblx, tmp2x
-	PSHUFB in0x, tmp1x
-	PSHUFB in0_hx, tmp2x
-	PXOR   tmp1x, tmp2x
-	MOVOU  tmp2x, (out)
-	ADDQ   $16, in
-	ADDQ   $16, out
-	SUBQ   $1, len
-	JNZ    loop
-	RET
-
-// func mulVectAddSSSE3(tbl, d, p []byte)
-TEXT ·mulVectAddSSSE3(SB), NOSPLIT, $0
-    MOVQ    i+24(FP), in
-	MOVQ    o+48(FP), out
-	MOVQ    tbl+0(FP), tmp0
-	MOVOU   (tmp0), low_tblx
-	MOVOU   16(tmp0), high_tblx
-    MOVB    $15, tmp0
-    MOVQ    tmp0, maskx
-    PXOR    tmp0x, tmp0x
-   	PSHUFB  tmp0x, maskx
-	MOVQ    in_len+32(FP), len
-	SHRQ    $4, len
-
-loop:
-	MOVOU  (in), in0x
-	MOVOU  in0x, in0_hx
-	PSRLQ  $4, in0_hx
-	PAND   maskx, in0x
-	PAND   maskx, in0_hx
-	MOVOU  low_tblx, tmp1x
-	MOVOU  high_tblx, tmp2x
-	PSHUFB in0x, tmp1x
-	PSHUFB in0_hx, tmp2x
-	PXOR   tmp1x, tmp2x
-	MOVOU  (out), tmp3x
-	PXOR   tmp3x, tmp2x
-	MOVOU  tmp2x, (out)
-	ADDQ   $16, in
-	ADDQ   $16, out
-	SUBQ   $1, len
-	JNZ    loop
-	RET
-
-// func copy32B(dst, src []byte)
-TEXT ·copy32B(SB), NOSPLIT, $0
-    MOVQ dst+0(FP), SI
-    MOVQ src+24(FP), DX
-    MOVOU (DX), X0
-    MOVOU 16(DX), X1
-    MOVOU X0, (SI)
-    MOVOU X1, 16(SI)
-    RET
-	

+ 0 - 8
vendor/github.com/templexxx/reedsolomon/rs_other.go

@@ -1,8 +0,0 @@
-// +build !amd64
-
-package reedsolomon
-
-func newRS(d, p int, em matrix) (enc Encoder) {
-	g := em[d*d:]
-	return &encBase{data: d, parity: p, encode: em, gen: g}
-}

File diff suppressed because it is too large
+ 0 - 37
vendor/github.com/templexxx/reedsolomon/tbl.go


+ 119 - 0
vendor/golang.org/x/net/ipv6/batch.go

@@ -0,0 +1,119 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.9
+
+package ipv6
+
+import (
+	"net"
+	"runtime"
+	"syscall"
+
+	"golang.org/x/net/internal/socket"
+)
+
+// BUG(mikio): On Windows, the ReadBatch and WriteBatch methods of
+// PacketConn are not implemented.
+
+// A Message represents an IO message.
+//
+//	type Message struct {
+//		Buffers [][]byte
+//		OOB     []byte
+//		Addr    net.Addr
+//		N       int
+//		NN      int
+//		Flags   int
+//	}
+//
+// The Buffers fields represents a list of contiguous buffers, which
+// can be used for vectored IO, for example, putting a header and a
+// payload in each slice.
+// When writing, the Buffers field must contain at least one byte to
+// write.
+// When reading, the Buffers field will always contain a byte to read.
+//
+// The OOB field contains protocol-specific control or miscellaneous
+// ancillary data known as out-of-band data.
+// It can be nil when not required.
+//
+// The Addr field specifies a destination address when writing.
+// It can be nil when the underlying protocol of the endpoint uses
+// connection-oriented communication.
+// After a successful read, it may contain the source address on the
+// received packet.
+//
+// The N field indicates the number of bytes read or written from/to
+// Buffers.
+//
+// The NN field indicates the number of bytes read or written from/to
+// OOB.
+//
+// The Flags field contains protocol-specific information on the
+// received message.
+type Message = socket.Message
+
+// ReadBatch reads a batch of messages.
+//
+// The provided flags is a set of platform-dependent flags, such as
+// syscall.MSG_PEEK.
+//
+// On a successful read it returns the number of messages received, up
+// to len(ms).
+//
+// On Linux, a batch read will be optimized.
+// On other platforms, this method will read only a single message.
+func (c *payloadHandler) ReadBatch(ms []Message, flags int) (int, error) {
+	if !c.ok() {
+		return 0, syscall.EINVAL
+	}
+	switch runtime.GOOS {
+	case "linux":
+		n, err := c.RecvMsgs([]socket.Message(ms), flags)
+		if err != nil {
+			err = &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
+		}
+		return n, err
+	default:
+		n := 1
+		err := c.RecvMsg(&ms[0], flags)
+		if err != nil {
+			n = 0
+			err = &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
+		}
+		return n, err
+	}
+}
+
+// WriteBatch writes a batch of messages.
+//
+// The provided flags is a set of platform-dependent flags, such as
+// syscall.MSG_DONTROUTE.
+//
+// It returns the number of messages written on a successful write.
+//
+// On Linux, a batch write will be optimized.
+// On other platforms, this method will write only a single message.
+func (c *payloadHandler) WriteBatch(ms []Message, flags int) (int, error) {
+	if !c.ok() {
+		return 0, syscall.EINVAL
+	}
+	switch runtime.GOOS {
+	case "linux":
+		n, err := c.SendMsgs([]socket.Message(ms), flags)
+		if err != nil {
+			err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
+		}
+		return n, err
+	default:
+		n := 1
+		err := c.SendMsg(&ms[0], flags)
+		if err != nil {
+			n = 0
+			err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
+		}
+		return n, err
+	}
+}

+ 187 - 0
vendor/golang.org/x/net/ipv6/control.go

@@ -0,0 +1,187 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"fmt"
+	"net"
+	"sync"
+
+	"golang.org/x/net/internal/iana"
+	"golang.org/x/net/internal/socket"
+)
+
+// Note that RFC 3542 obsoletes RFC 2292 but OS X Snow Leopard and the
+// former still support RFC 2292 only. Please be aware that almost
+// all protocol implementations prohibit using a combination of RFC
+// 2292 and RFC 3542 for some practical reasons.
+
+type rawOpt struct {
+	sync.RWMutex
+	cflags ControlFlags
+}
+
+func (c *rawOpt) set(f ControlFlags)        { c.cflags |= f }
+func (c *rawOpt) clear(f ControlFlags)      { c.cflags &^= f }
+func (c *rawOpt) isset(f ControlFlags) bool { return c.cflags&f != 0 }
+
+// A ControlFlags represents per packet basis IP-level socket option
+// control flags.
+type ControlFlags uint
+
+const (
+	FlagTrafficClass ControlFlags = 1 << iota // pass the traffic class on the received packet
+	FlagHopLimit                              // pass the hop limit on the received packet
+	FlagSrc                                   // pass the source address on the received packet
+	FlagDst                                   // pass the destination address on the received packet
+	FlagInterface                             // pass the interface index on the received packet
+	FlagPathMTU                               // pass the path MTU on the received packet path
+)
+
+const flagPacketInfo = FlagDst | FlagInterface
+
+// A ControlMessage represents per packet basis IP-level socket
+// options.
+type ControlMessage struct {
+	// Receiving socket options: SetControlMessage allows to
+	// receive the options from the protocol stack using ReadFrom
+	// method of PacketConn.
+	//
+	// Specifying socket options: ControlMessage for WriteTo
+	// method of PacketConn allows to send the options to the
+	// protocol stack.
+	//
+	TrafficClass int    // traffic class, must be 1 <= value <= 255 when specifying
+	HopLimit     int    // hop limit, must be 1 <= value <= 255 when specifying
+	Src          net.IP // source address, specifying only
+	Dst          net.IP // destination address, receiving only
+	IfIndex      int    // interface index, must be 1 <= value when specifying
+	NextHop      net.IP // next hop address, specifying only
+	MTU          int    // path MTU, receiving only
+}
+
+func (cm *ControlMessage) String() string {
+	if cm == nil {
+		return "<nil>"
+	}
+	return fmt.Sprintf("tclass=%#x hoplim=%d src=%v dst=%v ifindex=%d nexthop=%v mtu=%d", cm.TrafficClass, cm.HopLimit, cm.Src, cm.Dst, cm.IfIndex, cm.NextHop, cm.MTU)
+}
+
+// Marshal returns the binary encoding of cm.
+func (cm *ControlMessage) Marshal() []byte {
+	if cm == nil {
+		return nil
+	}
+	var l int
+	tclass := false
+	if ctlOpts[ctlTrafficClass].name > 0 && cm.TrafficClass > 0 {
+		tclass = true
+		l += socket.ControlMessageSpace(ctlOpts[ctlTrafficClass].length)
+	}
+	hoplimit := false
+	if ctlOpts[ctlHopLimit].name > 0 && cm.HopLimit > 0 {
+		hoplimit = true
+		l += socket.ControlMessageSpace(ctlOpts[ctlHopLimit].length)
+	}
+	pktinfo := false
+	if ctlOpts[ctlPacketInfo].name > 0 && (cm.Src.To16() != nil && cm.Src.To4() == nil || cm.IfIndex > 0) {
+		pktinfo = true
+		l += socket.ControlMessageSpace(ctlOpts[ctlPacketInfo].length)
+	}
+	nexthop := false
+	if ctlOpts[ctlNextHop].name > 0 && cm.NextHop.To16() != nil && cm.NextHop.To4() == nil {
+		nexthop = true
+		l += socket.ControlMessageSpace(ctlOpts[ctlNextHop].length)
+	}
+	var b []byte
+	if l > 0 {
+		b = make([]byte, l)
+		bb := b
+		if tclass {
+			bb = ctlOpts[ctlTrafficClass].marshal(bb, cm)
+		}
+		if hoplimit {
+			bb = ctlOpts[ctlHopLimit].marshal(bb, cm)
+		}
+		if pktinfo {
+			bb = ctlOpts[ctlPacketInfo].marshal(bb, cm)
+		}
+		if nexthop {
+			bb = ctlOpts[ctlNextHop].marshal(bb, cm)
+		}
+	}
+	return b
+}
+
+// Parse parses b as a control message and stores the result in cm.
+func (cm *ControlMessage) Parse(b []byte) error {
+	ms, err := socket.ControlMessage(b).Parse()
+	if err != nil {
+		return err
+	}
+	for _, m := range ms {
+		lvl, typ, l, err := m.ParseHeader()
+		if err != nil {
+			return err
+		}
+		if lvl != iana.ProtocolIPv6 {
+			continue
+		}
+		switch {
+		case typ == ctlOpts[ctlTrafficClass].name && l >= ctlOpts[ctlTrafficClass].length:
+			ctlOpts[ctlTrafficClass].parse(cm, m.Data(l))
+		case typ == ctlOpts[ctlHopLimit].name && l >= ctlOpts[ctlHopLimit].length:
+			ctlOpts[ctlHopLimit].parse(cm, m.Data(l))
+		case typ == ctlOpts[ctlPacketInfo].name && l >= ctlOpts[ctlPacketInfo].length:
+			ctlOpts[ctlPacketInfo].parse(cm, m.Data(l))
+		case typ == ctlOpts[ctlPathMTU].name && l >= ctlOpts[ctlPathMTU].length:
+			ctlOpts[ctlPathMTU].parse(cm, m.Data(l))
+		}
+	}
+	return nil
+}
+
+// NewControlMessage returns a new control message.
+//
+// The returned message is large enough for options specified by cf.
+func NewControlMessage(cf ControlFlags) []byte {
+	opt := rawOpt{cflags: cf}
+	var l int
+	if opt.isset(FlagTrafficClass) && ctlOpts[ctlTrafficClass].name > 0 {
+		l += socket.ControlMessageSpace(ctlOpts[ctlTrafficClass].length)
+	}
+	if opt.isset(FlagHopLimit) && ctlOpts[ctlHopLimit].name > 0 {
+		l += socket.ControlMessageSpace(ctlOpts[ctlHopLimit].length)
+	}
+	if opt.isset(flagPacketInfo) && ctlOpts[ctlPacketInfo].name > 0 {
+		l += socket.ControlMessageSpace(ctlOpts[ctlPacketInfo].length)
+	}
+	if opt.isset(FlagPathMTU) && ctlOpts[ctlPathMTU].name > 0 {
+		l += socket.ControlMessageSpace(ctlOpts[ctlPathMTU].length)
+	}
+	var b []byte
+	if l > 0 {
+		b = make([]byte, l)
+	}
+	return b
+}
+
+// Ancillary data socket options
+const (
+	ctlTrafficClass = iota // header field
+	ctlHopLimit            // header field
+	ctlPacketInfo          // inbound or outbound packet path
+	ctlNextHop             // nexthop
+	ctlPathMTU             // path mtu
+	ctlMax
+)
+
+// A ctlOpt represents a binding for ancillary data socket option.
+type ctlOpt struct {
+	name    int // option name, must be equal or greater than 1
+	length  int // option length
+	marshal func([]byte, *ControlMessage) []byte
+	parse   func(*ControlMessage, []byte)
+}

+ 48 - 0
vendor/golang.org/x/net/ipv6/control_rfc2292_unix.go

@@ -0,0 +1,48 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin
+
+package ipv6
+
+import (
+	"unsafe"
+
+	"golang.org/x/net/internal/iana"
+	"golang.org/x/net/internal/socket"
+)
+
+func marshal2292HopLimit(b []byte, cm *ControlMessage) []byte {
+	m := socket.ControlMessage(b)
+	m.MarshalHeader(iana.ProtocolIPv6, sysIPV6_2292HOPLIMIT, 4)
+	if cm != nil {
+		socket.NativeEndian.PutUint32(m.Data(4), uint32(cm.HopLimit))
+	}
+	return m.Next(4)
+}
+
+func marshal2292PacketInfo(b []byte, cm *ControlMessage) []byte {
+	m := socket.ControlMessage(b)
+	m.MarshalHeader(iana.ProtocolIPv6, sysIPV6_2292PKTINFO, sizeofInet6Pktinfo)
+	if cm != nil {
+		pi := (*inet6Pktinfo)(unsafe.Pointer(&m.Data(sizeofInet6Pktinfo)[0]))
+		if ip := cm.Src.To16(); ip != nil && ip.To4() == nil {
+			copy(pi.Addr[:], ip)
+		}
+		if cm.IfIndex > 0 {
+			pi.setIfindex(cm.IfIndex)
+		}
+	}
+	return m.Next(sizeofInet6Pktinfo)
+}
+
+func marshal2292NextHop(b []byte, cm *ControlMessage) []byte {
+	m := socket.ControlMessage(b)
+	m.MarshalHeader(iana.ProtocolIPv6, sysIPV6_2292NEXTHOP, sizeofSockaddrInet6)
+	if cm != nil {
+		sa := (*sockaddrInet6)(unsafe.Pointer(&m.Data(sizeofSockaddrInet6)[0]))
+		sa.setSockaddr(cm.NextHop, cm.IfIndex)
+	}
+	return m.Next(sizeofSockaddrInet6)
+}

+ 94 - 0
vendor/golang.org/x/net/ipv6/control_rfc3542_unix.go

@@ -0,0 +1,94 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package ipv6
+
+import (
+	"net"
+	"unsafe"
+
+	"golang.org/x/net/internal/iana"
+	"golang.org/x/net/internal/socket"
+)
+
+func marshalTrafficClass(b []byte, cm *ControlMessage) []byte {
+	m := socket.ControlMessage(b)
+	m.MarshalHeader(iana.ProtocolIPv6, sysIPV6_TCLASS, 4)
+	if cm != nil {
+		socket.NativeEndian.PutUint32(m.Data(4), uint32(cm.TrafficClass))
+	}
+	return m.Next(4)
+}
+
+func parseTrafficClass(cm *ControlMessage, b []byte) {
+	cm.TrafficClass = int(socket.NativeEndian.Uint32(b[:4]))
+}
+
+func marshalHopLimit(b []byte, cm *ControlMessage) []byte {
+	m := socket.ControlMessage(b)
+	m.MarshalHeader(iana.ProtocolIPv6, sysIPV6_HOPLIMIT, 4)
+	if cm != nil {
+		socket.NativeEndian.PutUint32(m.Data(4), uint32(cm.HopLimit))
+	}
+	return m.Next(4)
+}
+
+func parseHopLimit(cm *ControlMessage, b []byte) {
+	cm.HopLimit = int(socket.NativeEndian.Uint32(b[:4]))
+}
+
+func marshalPacketInfo(b []byte, cm *ControlMessage) []byte {
+	m := socket.ControlMessage(b)
+	m.MarshalHeader(iana.ProtocolIPv6, sysIPV6_PKTINFO, sizeofInet6Pktinfo)
+	if cm != nil {
+		pi := (*inet6Pktinfo)(unsafe.Pointer(&m.Data(sizeofInet6Pktinfo)[0]))
+		if ip := cm.Src.To16(); ip != nil && ip.To4() == nil {
+			copy(pi.Addr[:], ip)
+		}
+		if cm.IfIndex > 0 {
+			pi.setIfindex(cm.IfIndex)
+		}
+	}
+	return m.Next(sizeofInet6Pktinfo)
+}
+
+func parsePacketInfo(cm *ControlMessage, b []byte) {
+	pi := (*inet6Pktinfo)(unsafe.Pointer(&b[0]))
+	if len(cm.Dst) < net.IPv6len {
+		cm.Dst = make(net.IP, net.IPv6len)
+	}
+	copy(cm.Dst, pi.Addr[:])
+	cm.IfIndex = int(pi.Ifindex)
+}
+
+func marshalNextHop(b []byte, cm *ControlMessage) []byte {
+	m := socket.ControlMessage(b)
+	m.MarshalHeader(iana.ProtocolIPv6, sysIPV6_NEXTHOP, sizeofSockaddrInet6)
+	if cm != nil {
+		sa := (*sockaddrInet6)(unsafe.Pointer(&m.Data(sizeofSockaddrInet6)[0]))
+		sa.setSockaddr(cm.NextHop, cm.IfIndex)
+	}
+	return m.Next(sizeofSockaddrInet6)
+}
+
+func parseNextHop(cm *ControlMessage, b []byte) {
+}
+
+func marshalPathMTU(b []byte, cm *ControlMessage) []byte {
+	m := socket.ControlMessage(b)
+	m.MarshalHeader(iana.ProtocolIPv6, sysIPV6_PATHMTU, sizeofIPv6Mtuinfo)
+	return m.Next(sizeofIPv6Mtuinfo)
+}
+
+func parsePathMTU(cm *ControlMessage, b []byte) {
+	mi := (*ipv6Mtuinfo)(unsafe.Pointer(&b[0]))
+	if len(cm.Dst) < net.IPv6len {
+		cm.Dst = make(net.IP, net.IPv6len)
+	}
+	copy(cm.Dst, mi.Addr.Addr[:])
+	cm.IfIndex = int(mi.Addr.Scope_id)
+	cm.MTU = int(mi.Mtu)
+}

+ 13 - 0
vendor/golang.org/x/net/ipv6/control_stub.go

@@ -0,0 +1,13 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows
+
+package ipv6
+
+import "golang.org/x/net/internal/socket"
+
+func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
+	return errOpNoSupport
+}

+ 55 - 0
vendor/golang.org/x/net/ipv6/control_unix.go

@@ -0,0 +1,55 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package ipv6
+
+import "golang.org/x/net/internal/socket"
+
+func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
+	opt.Lock()
+	defer opt.Unlock()
+	if so, ok := sockOpts[ssoReceiveTrafficClass]; ok && cf&FlagTrafficClass != 0 {
+		if err := so.SetInt(c, boolint(on)); err != nil {
+			return err
+		}
+		if on {
+			opt.set(FlagTrafficClass)
+		} else {
+			opt.clear(FlagTrafficClass)
+		}
+	}
+	if so, ok := sockOpts[ssoReceiveHopLimit]; ok && cf&FlagHopLimit != 0 {
+		if err := so.SetInt(c, boolint(on)); err != nil {
+			return err
+		}
+		if on {
+			opt.set(FlagHopLimit)
+		} else {
+			opt.clear(FlagHopLimit)
+		}
+	}
+	if so, ok := sockOpts[ssoReceivePacketInfo]; ok && cf&flagPacketInfo != 0 {
+		if err := so.SetInt(c, boolint(on)); err != nil {
+			return err
+		}
+		if on {
+			opt.set(cf & flagPacketInfo)
+		} else {
+			opt.clear(cf & flagPacketInfo)
+		}
+	}
+	if so, ok := sockOpts[ssoReceivePathMTU]; ok && cf&FlagPathMTU != 0 {
+		if err := so.SetInt(c, boolint(on)); err != nil {
+			return err
+		}
+		if on {
+			opt.set(FlagPathMTU)
+		} else {
+			opt.clear(FlagPathMTU)
+		}
+	}
+	return nil
+}

+ 16 - 0
vendor/golang.org/x/net/ipv6/control_windows.go

@@ -0,0 +1,16 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"syscall"
+
+	"golang.org/x/net/internal/socket"
+)
+
+func setControlMessage(c *socket.Conn, opt *rawOpt, cf ControlFlags, on bool) error {
+	// TODO(mikio): implement this
+	return syscall.EWINDOWS
+}

+ 112 - 0
vendor/golang.org/x/net/ipv6/defs_darwin.go

@@ -0,0 +1,112 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// +godefs map struct_in6_addr [16]byte /* in6_addr */
+
+package ipv6
+
+/*
+#define __APPLE_USE_RFC_3542
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+*/
+import "C"
+
+const (
+	sysIPV6_UNICAST_HOPS   = C.IPV6_UNICAST_HOPS
+	sysIPV6_MULTICAST_IF   = C.IPV6_MULTICAST_IF
+	sysIPV6_MULTICAST_HOPS = C.IPV6_MULTICAST_HOPS
+	sysIPV6_MULTICAST_LOOP = C.IPV6_MULTICAST_LOOP
+	sysIPV6_JOIN_GROUP     = C.IPV6_JOIN_GROUP
+	sysIPV6_LEAVE_GROUP    = C.IPV6_LEAVE_GROUP
+
+	sysIPV6_PORTRANGE    = C.IPV6_PORTRANGE
+	sysICMP6_FILTER      = C.ICMP6_FILTER
+	sysIPV6_2292PKTINFO  = C.IPV6_2292PKTINFO
+	sysIPV6_2292HOPLIMIT = C.IPV6_2292HOPLIMIT
+	sysIPV6_2292NEXTHOP  = C.IPV6_2292NEXTHOP
+	sysIPV6_2292HOPOPTS  = C.IPV6_2292HOPOPTS
+	sysIPV6_2292DSTOPTS  = C.IPV6_2292DSTOPTS
+	sysIPV6_2292RTHDR    = C.IPV6_2292RTHDR
+
+	sysIPV6_2292PKTOPTIONS = C.IPV6_2292PKTOPTIONS
+
+	sysIPV6_CHECKSUM = C.IPV6_CHECKSUM
+	sysIPV6_V6ONLY   = C.IPV6_V6ONLY
+
+	sysIPV6_IPSEC_POLICY = C.IPV6_IPSEC_POLICY
+
+	sysIPV6_RECVTCLASS = C.IPV6_RECVTCLASS
+	sysIPV6_TCLASS     = C.IPV6_TCLASS
+
+	sysIPV6_RTHDRDSTOPTS = C.IPV6_RTHDRDSTOPTS
+
+	sysIPV6_RECVPKTINFO = C.IPV6_RECVPKTINFO
+
+	sysIPV6_RECVHOPLIMIT = C.IPV6_RECVHOPLIMIT
+	sysIPV6_RECVRTHDR    = C.IPV6_RECVRTHDR
+	sysIPV6_RECVHOPOPTS  = C.IPV6_RECVHOPOPTS
+	sysIPV6_RECVDSTOPTS  = C.IPV6_RECVDSTOPTS
+
+	sysIPV6_USE_MIN_MTU = C.IPV6_USE_MIN_MTU
+	sysIPV6_RECVPATHMTU = C.IPV6_RECVPATHMTU
+
+	sysIPV6_PATHMTU = C.IPV6_PATHMTU
+
+	sysIPV6_PKTINFO  = C.IPV6_PKTINFO
+	sysIPV6_HOPLIMIT = C.IPV6_HOPLIMIT
+	sysIPV6_NEXTHOP  = C.IPV6_NEXTHOP
+	sysIPV6_HOPOPTS  = C.IPV6_HOPOPTS
+	sysIPV6_DSTOPTS  = C.IPV6_DSTOPTS
+	sysIPV6_RTHDR    = C.IPV6_RTHDR
+
+	sysIPV6_AUTOFLOWLABEL = C.IPV6_AUTOFLOWLABEL
+
+	sysIPV6_DONTFRAG = C.IPV6_DONTFRAG
+
+	sysIPV6_PREFER_TEMPADDR = C.IPV6_PREFER_TEMPADDR
+
+	sysIPV6_MSFILTER            = C.IPV6_MSFILTER
+	sysMCAST_JOIN_GROUP         = C.MCAST_JOIN_GROUP
+	sysMCAST_LEAVE_GROUP        = C.MCAST_LEAVE_GROUP
+	sysMCAST_JOIN_SOURCE_GROUP  = C.MCAST_JOIN_SOURCE_GROUP
+	sysMCAST_LEAVE_SOURCE_GROUP = C.MCAST_LEAVE_SOURCE_GROUP
+	sysMCAST_BLOCK_SOURCE       = C.MCAST_BLOCK_SOURCE
+	sysMCAST_UNBLOCK_SOURCE     = C.MCAST_UNBLOCK_SOURCE
+
+	sysIPV6_BOUND_IF = C.IPV6_BOUND_IF
+
+	sysIPV6_PORTRANGE_DEFAULT = C.IPV6_PORTRANGE_DEFAULT
+	sysIPV6_PORTRANGE_HIGH    = C.IPV6_PORTRANGE_HIGH
+	sysIPV6_PORTRANGE_LOW     = C.IPV6_PORTRANGE_LOW
+
+	sizeofSockaddrStorage = C.sizeof_struct_sockaddr_storage
+	sizeofSockaddrInet6   = C.sizeof_struct_sockaddr_in6
+	sizeofInet6Pktinfo    = C.sizeof_struct_in6_pktinfo
+	sizeofIPv6Mtuinfo     = C.sizeof_struct_ip6_mtuinfo
+
+	sizeofIPv6Mreq       = C.sizeof_struct_ipv6_mreq
+	sizeofGroupReq       = C.sizeof_struct_group_req
+	sizeofGroupSourceReq = C.sizeof_struct_group_source_req
+
+	sizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
+)
+
+type sockaddrStorage C.struct_sockaddr_storage
+
+type sockaddrInet6 C.struct_sockaddr_in6
+
+type inet6Pktinfo C.struct_in6_pktinfo
+
+type ipv6Mtuinfo C.struct_ip6_mtuinfo
+
+type ipv6Mreq C.struct_ipv6_mreq
+
+type icmpv6Filter C.struct_icmp6_filter
+
+type groupReq C.struct_group_req
+
+type groupSourceReq C.struct_group_source_req

+ 84 - 0
vendor/golang.org/x/net/ipv6/defs_dragonfly.go

@@ -0,0 +1,84 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// +godefs map struct_in6_addr [16]byte /* in6_addr */
+
+package ipv6
+
+/*
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+*/
+import "C"
+
+const (
+	sysIPV6_UNICAST_HOPS   = C.IPV6_UNICAST_HOPS
+	sysIPV6_MULTICAST_IF   = C.IPV6_MULTICAST_IF
+	sysIPV6_MULTICAST_HOPS = C.IPV6_MULTICAST_HOPS
+	sysIPV6_MULTICAST_LOOP = C.IPV6_MULTICAST_LOOP
+	sysIPV6_JOIN_GROUP     = C.IPV6_JOIN_GROUP
+	sysIPV6_LEAVE_GROUP    = C.IPV6_LEAVE_GROUP
+	sysIPV6_PORTRANGE      = C.IPV6_PORTRANGE
+	sysICMP6_FILTER        = C.ICMP6_FILTER
+
+	sysIPV6_CHECKSUM = C.IPV6_CHECKSUM
+	sysIPV6_V6ONLY   = C.IPV6_V6ONLY
+
+	sysIPV6_IPSEC_POLICY = C.IPV6_IPSEC_POLICY
+
+	sysIPV6_RTHDRDSTOPTS = C.IPV6_RTHDRDSTOPTS
+	sysIPV6_RECVPKTINFO  = C.IPV6_RECVPKTINFO
+	sysIPV6_RECVHOPLIMIT = C.IPV6_RECVHOPLIMIT
+	sysIPV6_RECVRTHDR    = C.IPV6_RECVRTHDR
+	sysIPV6_RECVHOPOPTS  = C.IPV6_RECVHOPOPTS
+	sysIPV6_RECVDSTOPTS  = C.IPV6_RECVDSTOPTS
+
+	sysIPV6_USE_MIN_MTU = C.IPV6_USE_MIN_MTU
+	sysIPV6_RECVPATHMTU = C.IPV6_RECVPATHMTU
+
+	sysIPV6_PATHMTU = C.IPV6_PATHMTU
+
+	sysIPV6_PKTINFO  = C.IPV6_PKTINFO
+	sysIPV6_HOPLIMIT = C.IPV6_HOPLIMIT
+	sysIPV6_NEXTHOP  = C.IPV6_NEXTHOP
+	sysIPV6_HOPOPTS  = C.IPV6_HOPOPTS
+	sysIPV6_DSTOPTS  = C.IPV6_DSTOPTS
+	sysIPV6_RTHDR    = C.IPV6_RTHDR
+
+	sysIPV6_RECVTCLASS = C.IPV6_RECVTCLASS
+
+	sysIPV6_AUTOFLOWLABEL = C.IPV6_AUTOFLOWLABEL
+
+	sysIPV6_TCLASS   = C.IPV6_TCLASS
+	sysIPV6_DONTFRAG = C.IPV6_DONTFRAG
+
+	sysIPV6_PREFER_TEMPADDR = C.IPV6_PREFER_TEMPADDR
+
+	sysIPV6_PORTRANGE_DEFAULT = C.IPV6_PORTRANGE_DEFAULT
+	sysIPV6_PORTRANGE_HIGH    = C.IPV6_PORTRANGE_HIGH
+	sysIPV6_PORTRANGE_LOW     = C.IPV6_PORTRANGE_LOW
+
+	sizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
+	sizeofInet6Pktinfo  = C.sizeof_struct_in6_pktinfo
+	sizeofIPv6Mtuinfo   = C.sizeof_struct_ip6_mtuinfo
+
+	sizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
+
+	sizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
+)
+
+type sockaddrInet6 C.struct_sockaddr_in6
+
+type inet6Pktinfo C.struct_in6_pktinfo
+
+type ipv6Mtuinfo C.struct_ip6_mtuinfo
+
+type ipv6Mreq C.struct_ipv6_mreq
+
+type icmpv6Filter C.struct_icmp6_filter

+ 105 - 0
vendor/golang.org/x/net/ipv6/defs_freebsd.go

@@ -0,0 +1,105 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// +godefs map struct_in6_addr [16]byte /* in6_addr */
+
+package ipv6
+
+/*
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+*/
+import "C"
+
+const (
+	sysIPV6_UNICAST_HOPS   = C.IPV6_UNICAST_HOPS
+	sysIPV6_MULTICAST_IF   = C.IPV6_MULTICAST_IF
+	sysIPV6_MULTICAST_HOPS = C.IPV6_MULTICAST_HOPS
+	sysIPV6_MULTICAST_LOOP = C.IPV6_MULTICAST_LOOP
+	sysIPV6_JOIN_GROUP     = C.IPV6_JOIN_GROUP
+	sysIPV6_LEAVE_GROUP    = C.IPV6_LEAVE_GROUP
+	sysIPV6_PORTRANGE      = C.IPV6_PORTRANGE
+	sysICMP6_FILTER        = C.ICMP6_FILTER
+
+	sysIPV6_CHECKSUM = C.IPV6_CHECKSUM
+	sysIPV6_V6ONLY   = C.IPV6_V6ONLY
+
+	sysIPV6_IPSEC_POLICY = C.IPV6_IPSEC_POLICY
+
+	sysIPV6_RTHDRDSTOPTS = C.IPV6_RTHDRDSTOPTS
+
+	sysIPV6_RECVPKTINFO  = C.IPV6_RECVPKTINFO
+	sysIPV6_RECVHOPLIMIT = C.IPV6_RECVHOPLIMIT
+	sysIPV6_RECVRTHDR    = C.IPV6_RECVRTHDR
+	sysIPV6_RECVHOPOPTS  = C.IPV6_RECVHOPOPTS
+	sysIPV6_RECVDSTOPTS  = C.IPV6_RECVDSTOPTS
+
+	sysIPV6_USE_MIN_MTU = C.IPV6_USE_MIN_MTU
+	sysIPV6_RECVPATHMTU = C.IPV6_RECVPATHMTU
+
+	sysIPV6_PATHMTU = C.IPV6_PATHMTU
+
+	sysIPV6_PKTINFO  = C.IPV6_PKTINFO
+	sysIPV6_HOPLIMIT = C.IPV6_HOPLIMIT
+	sysIPV6_NEXTHOP  = C.IPV6_NEXTHOP
+	sysIPV6_HOPOPTS  = C.IPV6_HOPOPTS
+	sysIPV6_DSTOPTS  = C.IPV6_DSTOPTS
+	sysIPV6_RTHDR    = C.IPV6_RTHDR
+
+	sysIPV6_RECVTCLASS = C.IPV6_RECVTCLASS
+
+	sysIPV6_AUTOFLOWLABEL = C.IPV6_AUTOFLOWLABEL
+
+	sysIPV6_TCLASS   = C.IPV6_TCLASS
+	sysIPV6_DONTFRAG = C.IPV6_DONTFRAG
+
+	sysIPV6_PREFER_TEMPADDR = C.IPV6_PREFER_TEMPADDR
+
+	sysIPV6_BINDANY = C.IPV6_BINDANY
+
+	sysIPV6_MSFILTER = C.IPV6_MSFILTER
+
+	sysMCAST_JOIN_GROUP         = C.MCAST_JOIN_GROUP
+	sysMCAST_LEAVE_GROUP        = C.MCAST_LEAVE_GROUP
+	sysMCAST_JOIN_SOURCE_GROUP  = C.MCAST_JOIN_SOURCE_GROUP
+	sysMCAST_LEAVE_SOURCE_GROUP = C.MCAST_LEAVE_SOURCE_GROUP
+	sysMCAST_BLOCK_SOURCE       = C.MCAST_BLOCK_SOURCE
+	sysMCAST_UNBLOCK_SOURCE     = C.MCAST_UNBLOCK_SOURCE
+
+	sysIPV6_PORTRANGE_DEFAULT = C.IPV6_PORTRANGE_DEFAULT
+	sysIPV6_PORTRANGE_HIGH    = C.IPV6_PORTRANGE_HIGH
+	sysIPV6_PORTRANGE_LOW     = C.IPV6_PORTRANGE_LOW
+
+	sizeofSockaddrStorage = C.sizeof_struct_sockaddr_storage
+	sizeofSockaddrInet6   = C.sizeof_struct_sockaddr_in6
+	sizeofInet6Pktinfo    = C.sizeof_struct_in6_pktinfo
+	sizeofIPv6Mtuinfo     = C.sizeof_struct_ip6_mtuinfo
+
+	sizeofIPv6Mreq       = C.sizeof_struct_ipv6_mreq
+	sizeofGroupReq       = C.sizeof_struct_group_req
+	sizeofGroupSourceReq = C.sizeof_struct_group_source_req
+
+	sizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
+)
+
+type sockaddrStorage C.struct_sockaddr_storage
+
+type sockaddrInet6 C.struct_sockaddr_in6
+
+type inet6Pktinfo C.struct_in6_pktinfo
+
+type ipv6Mtuinfo C.struct_ip6_mtuinfo
+
+type ipv6Mreq C.struct_ipv6_mreq
+
+type groupReq C.struct_group_req
+
+type groupSourceReq C.struct_group_source_req
+
+type icmpv6Filter C.struct_icmp6_filter

+ 147 - 0
vendor/golang.org/x/net/ipv6/defs_linux.go

@@ -0,0 +1,147 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// +godefs map struct_in6_addr [16]byte /* in6_addr */
+
+package ipv6
+
+/*
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/filter.h>
+#include <sys/socket.h>
+*/
+import "C"
+
+const (
+	sysIPV6_ADDRFORM       = C.IPV6_ADDRFORM
+	sysIPV6_2292PKTINFO    = C.IPV6_2292PKTINFO
+	sysIPV6_2292HOPOPTS    = C.IPV6_2292HOPOPTS
+	sysIPV6_2292DSTOPTS    = C.IPV6_2292DSTOPTS
+	sysIPV6_2292RTHDR      = C.IPV6_2292RTHDR
+	sysIPV6_2292PKTOPTIONS = C.IPV6_2292PKTOPTIONS
+	sysIPV6_CHECKSUM       = C.IPV6_CHECKSUM
+	sysIPV6_2292HOPLIMIT   = C.IPV6_2292HOPLIMIT
+	sysIPV6_NEXTHOP        = C.IPV6_NEXTHOP
+	sysIPV6_FLOWINFO       = C.IPV6_FLOWINFO
+
+	sysIPV6_UNICAST_HOPS        = C.IPV6_UNICAST_HOPS
+	sysIPV6_MULTICAST_IF        = C.IPV6_MULTICAST_IF
+	sysIPV6_MULTICAST_HOPS      = C.IPV6_MULTICAST_HOPS
+	sysIPV6_MULTICAST_LOOP      = C.IPV6_MULTICAST_LOOP
+	sysIPV6_ADD_MEMBERSHIP      = C.IPV6_ADD_MEMBERSHIP
+	sysIPV6_DROP_MEMBERSHIP     = C.IPV6_DROP_MEMBERSHIP
+	sysMCAST_JOIN_GROUP         = C.MCAST_JOIN_GROUP
+	sysMCAST_LEAVE_GROUP        = C.MCAST_LEAVE_GROUP
+	sysMCAST_JOIN_SOURCE_GROUP  = C.MCAST_JOIN_SOURCE_GROUP
+	sysMCAST_LEAVE_SOURCE_GROUP = C.MCAST_LEAVE_SOURCE_GROUP
+	sysMCAST_BLOCK_SOURCE       = C.MCAST_BLOCK_SOURCE
+	sysMCAST_UNBLOCK_SOURCE     = C.MCAST_UNBLOCK_SOURCE
+	sysMCAST_MSFILTER           = C.MCAST_MSFILTER
+	sysIPV6_ROUTER_ALERT        = C.IPV6_ROUTER_ALERT
+	sysIPV6_MTU_DISCOVER        = C.IPV6_MTU_DISCOVER
+	sysIPV6_MTU                 = C.IPV6_MTU
+	sysIPV6_RECVERR             = C.IPV6_RECVERR
+	sysIPV6_V6ONLY              = C.IPV6_V6ONLY
+	sysIPV6_JOIN_ANYCAST        = C.IPV6_JOIN_ANYCAST
+	sysIPV6_LEAVE_ANYCAST       = C.IPV6_LEAVE_ANYCAST
+
+	//sysIPV6_PMTUDISC_DONT      = C.IPV6_PMTUDISC_DONT
+	//sysIPV6_PMTUDISC_WANT      = C.IPV6_PMTUDISC_WANT
+	//sysIPV6_PMTUDISC_DO        = C.IPV6_PMTUDISC_DO
+	//sysIPV6_PMTUDISC_PROBE     = C.IPV6_PMTUDISC_PROBE
+	//sysIPV6_PMTUDISC_INTERFACE = C.IPV6_PMTUDISC_INTERFACE
+	//sysIPV6_PMTUDISC_OMIT      = C.IPV6_PMTUDISC_OMIT
+
+	sysIPV6_FLOWLABEL_MGR = C.IPV6_FLOWLABEL_MGR
+	sysIPV6_FLOWINFO_SEND = C.IPV6_FLOWINFO_SEND
+
+	sysIPV6_IPSEC_POLICY = C.IPV6_IPSEC_POLICY
+	sysIPV6_XFRM_POLICY  = C.IPV6_XFRM_POLICY
+
+	sysIPV6_RECVPKTINFO  = C.IPV6_RECVPKTINFO
+	sysIPV6_PKTINFO      = C.IPV6_PKTINFO
+	sysIPV6_RECVHOPLIMIT = C.IPV6_RECVHOPLIMIT
+	sysIPV6_HOPLIMIT     = C.IPV6_HOPLIMIT
+	sysIPV6_RECVHOPOPTS  = C.IPV6_RECVHOPOPTS
+	sysIPV6_HOPOPTS      = C.IPV6_HOPOPTS
+	sysIPV6_RTHDRDSTOPTS = C.IPV6_RTHDRDSTOPTS
+	sysIPV6_RECVRTHDR    = C.IPV6_RECVRTHDR
+	sysIPV6_RTHDR        = C.IPV6_RTHDR
+	sysIPV6_RECVDSTOPTS  = C.IPV6_RECVDSTOPTS
+	sysIPV6_DSTOPTS      = C.IPV6_DSTOPTS
+	sysIPV6_RECVPATHMTU  = C.IPV6_RECVPATHMTU
+	sysIPV6_PATHMTU      = C.IPV6_PATHMTU
+	sysIPV6_DONTFRAG     = C.IPV6_DONTFRAG
+
+	sysIPV6_RECVTCLASS = C.IPV6_RECVTCLASS
+	sysIPV6_TCLASS     = C.IPV6_TCLASS
+
+	sysIPV6_ADDR_PREFERENCES = C.IPV6_ADDR_PREFERENCES
+
+	sysIPV6_PREFER_SRC_TMP            = C.IPV6_PREFER_SRC_TMP
+	sysIPV6_PREFER_SRC_PUBLIC         = C.IPV6_PREFER_SRC_PUBLIC
+	sysIPV6_PREFER_SRC_PUBTMP_DEFAULT = C.IPV6_PREFER_SRC_PUBTMP_DEFAULT
+	sysIPV6_PREFER_SRC_COA            = C.IPV6_PREFER_SRC_COA
+	sysIPV6_PREFER_SRC_HOME           = C.IPV6_PREFER_SRC_HOME
+	sysIPV6_PREFER_SRC_CGA            = C.IPV6_PREFER_SRC_CGA
+	sysIPV6_PREFER_SRC_NONCGA         = C.IPV6_PREFER_SRC_NONCGA
+
+	sysIPV6_MINHOPCOUNT = C.IPV6_MINHOPCOUNT
+
+	sysIPV6_ORIGDSTADDR     = C.IPV6_ORIGDSTADDR
+	sysIPV6_RECVORIGDSTADDR = C.IPV6_RECVORIGDSTADDR
+	sysIPV6_TRANSPARENT     = C.IPV6_TRANSPARENT
+	sysIPV6_UNICAST_IF      = C.IPV6_UNICAST_IF
+
+	sysICMPV6_FILTER = C.ICMPV6_FILTER
+
+	sysICMPV6_FILTER_BLOCK       = C.ICMPV6_FILTER_BLOCK
+	sysICMPV6_FILTER_PASS        = C.ICMPV6_FILTER_PASS
+	sysICMPV6_FILTER_BLOCKOTHERS = C.ICMPV6_FILTER_BLOCKOTHERS
+	sysICMPV6_FILTER_PASSONLY    = C.ICMPV6_FILTER_PASSONLY
+
+	sysSOL_SOCKET       = C.SOL_SOCKET
+	sysSO_ATTACH_FILTER = C.SO_ATTACH_FILTER
+
+	sizeofKernelSockaddrStorage = C.sizeof_struct___kernel_sockaddr_storage
+	sizeofSockaddrInet6         = C.sizeof_struct_sockaddr_in6
+	sizeofInet6Pktinfo          = C.sizeof_struct_in6_pktinfo
+	sizeofIPv6Mtuinfo           = C.sizeof_struct_ip6_mtuinfo
+	sizeofIPv6FlowlabelReq      = C.sizeof_struct_in6_flowlabel_req
+
+	sizeofIPv6Mreq       = C.sizeof_struct_ipv6_mreq
+	sizeofGroupReq       = C.sizeof_struct_group_req
+	sizeofGroupSourceReq = C.sizeof_struct_group_source_req
+
+	sizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
+
+	sizeofSockFprog = C.sizeof_struct_sock_fprog
+)
+
+type kernelSockaddrStorage C.struct___kernel_sockaddr_storage
+
+type sockaddrInet6 C.struct_sockaddr_in6
+
+type inet6Pktinfo C.struct_in6_pktinfo
+
+type ipv6Mtuinfo C.struct_ip6_mtuinfo
+
+type ipv6FlowlabelReq C.struct_in6_flowlabel_req
+
+type ipv6Mreq C.struct_ipv6_mreq
+
+type groupReq C.struct_group_req
+
+type groupSourceReq C.struct_group_source_req
+
+type icmpv6Filter C.struct_icmp6_filter
+
+type sockFProg C.struct_sock_fprog
+
+type sockFilter C.struct_sock_filter

+ 80 - 0
vendor/golang.org/x/net/ipv6/defs_netbsd.go

@@ -0,0 +1,80 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// +godefs map struct_in6_addr [16]byte /* in6_addr */
+
+package ipv6
+
+/*
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+*/
+import "C"
+
+const (
+	sysIPV6_UNICAST_HOPS   = C.IPV6_UNICAST_HOPS
+	sysIPV6_MULTICAST_IF   = C.IPV6_MULTICAST_IF
+	sysIPV6_MULTICAST_HOPS = C.IPV6_MULTICAST_HOPS
+	sysIPV6_MULTICAST_LOOP = C.IPV6_MULTICAST_LOOP
+	sysIPV6_JOIN_GROUP     = C.IPV6_JOIN_GROUP
+	sysIPV6_LEAVE_GROUP    = C.IPV6_LEAVE_GROUP
+	sysIPV6_PORTRANGE      = C.IPV6_PORTRANGE
+	sysICMP6_FILTER        = C.ICMP6_FILTER
+
+	sysIPV6_CHECKSUM = C.IPV6_CHECKSUM
+	sysIPV6_V6ONLY   = C.IPV6_V6ONLY
+
+	sysIPV6_IPSEC_POLICY = C.IPV6_IPSEC_POLICY
+
+	sysIPV6_RTHDRDSTOPTS = C.IPV6_RTHDRDSTOPTS
+
+	sysIPV6_RECVPKTINFO  = C.IPV6_RECVPKTINFO
+	sysIPV6_RECVHOPLIMIT = C.IPV6_RECVHOPLIMIT
+	sysIPV6_RECVRTHDR    = C.IPV6_RECVRTHDR
+	sysIPV6_RECVHOPOPTS  = C.IPV6_RECVHOPOPTS
+	sysIPV6_RECVDSTOPTS  = C.IPV6_RECVDSTOPTS
+
+	sysIPV6_USE_MIN_MTU = C.IPV6_USE_MIN_MTU
+	sysIPV6_RECVPATHMTU = C.IPV6_RECVPATHMTU
+	sysIPV6_PATHMTU     = C.IPV6_PATHMTU
+
+	sysIPV6_PKTINFO  = C.IPV6_PKTINFO
+	sysIPV6_HOPLIMIT = C.IPV6_HOPLIMIT
+	sysIPV6_NEXTHOP  = C.IPV6_NEXTHOP
+	sysIPV6_HOPOPTS  = C.IPV6_HOPOPTS
+	sysIPV6_DSTOPTS  = C.IPV6_DSTOPTS
+	sysIPV6_RTHDR    = C.IPV6_RTHDR
+
+	sysIPV6_RECVTCLASS = C.IPV6_RECVTCLASS
+
+	sysIPV6_TCLASS   = C.IPV6_TCLASS
+	sysIPV6_DONTFRAG = C.IPV6_DONTFRAG
+
+	sysIPV6_PORTRANGE_DEFAULT = C.IPV6_PORTRANGE_DEFAULT
+	sysIPV6_PORTRANGE_HIGH    = C.IPV6_PORTRANGE_HIGH
+	sysIPV6_PORTRANGE_LOW     = C.IPV6_PORTRANGE_LOW
+
+	sizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
+	sizeofInet6Pktinfo  = C.sizeof_struct_in6_pktinfo
+	sizeofIPv6Mtuinfo   = C.sizeof_struct_ip6_mtuinfo
+
+	sizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
+
+	sizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
+)
+
+type sockaddrInet6 C.struct_sockaddr_in6
+
+type inet6Pktinfo C.struct_in6_pktinfo
+
+type ipv6Mtuinfo C.struct_ip6_mtuinfo
+
+type ipv6Mreq C.struct_ipv6_mreq
+
+type icmpv6Filter C.struct_icmp6_filter

+ 89 - 0
vendor/golang.org/x/net/ipv6/defs_openbsd.go

@@ -0,0 +1,89 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// +godefs map struct_in6_addr [16]byte /* in6_addr */
+
+package ipv6
+
+/*
+#include <sys/param.h>
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+*/
+import "C"
+
+const (
+	sysIPV6_UNICAST_HOPS   = C.IPV6_UNICAST_HOPS
+	sysIPV6_MULTICAST_IF   = C.IPV6_MULTICAST_IF
+	sysIPV6_MULTICAST_HOPS = C.IPV6_MULTICAST_HOPS
+	sysIPV6_MULTICAST_LOOP = C.IPV6_MULTICAST_LOOP
+	sysIPV6_JOIN_GROUP     = C.IPV6_JOIN_GROUP
+	sysIPV6_LEAVE_GROUP    = C.IPV6_LEAVE_GROUP
+	sysIPV6_PORTRANGE      = C.IPV6_PORTRANGE
+	sysICMP6_FILTER        = C.ICMP6_FILTER
+
+	sysIPV6_CHECKSUM = C.IPV6_CHECKSUM
+	sysIPV6_V6ONLY   = C.IPV6_V6ONLY
+
+	sysIPV6_RTHDRDSTOPTS = C.IPV6_RTHDRDSTOPTS
+
+	sysIPV6_RECVPKTINFO  = C.IPV6_RECVPKTINFO
+	sysIPV6_RECVHOPLIMIT = C.IPV6_RECVHOPLIMIT
+	sysIPV6_RECVRTHDR    = C.IPV6_RECVRTHDR
+	sysIPV6_RECVHOPOPTS  = C.IPV6_RECVHOPOPTS
+	sysIPV6_RECVDSTOPTS  = C.IPV6_RECVDSTOPTS
+
+	sysIPV6_USE_MIN_MTU = C.IPV6_USE_MIN_MTU
+	sysIPV6_RECVPATHMTU = C.IPV6_RECVPATHMTU
+
+	sysIPV6_PATHMTU = C.IPV6_PATHMTU
+
+	sysIPV6_PKTINFO  = C.IPV6_PKTINFO
+	sysIPV6_HOPLIMIT = C.IPV6_HOPLIMIT
+	sysIPV6_NEXTHOP  = C.IPV6_NEXTHOP
+	sysIPV6_HOPOPTS  = C.IPV6_HOPOPTS
+	sysIPV6_DSTOPTS  = C.IPV6_DSTOPTS
+	sysIPV6_RTHDR    = C.IPV6_RTHDR
+
+	sysIPV6_AUTH_LEVEL        = C.IPV6_AUTH_LEVEL
+	sysIPV6_ESP_TRANS_LEVEL   = C.IPV6_ESP_TRANS_LEVEL
+	sysIPV6_ESP_NETWORK_LEVEL = C.IPV6_ESP_NETWORK_LEVEL
+	sysIPSEC6_OUTSA           = C.IPSEC6_OUTSA
+	sysIPV6_RECVTCLASS        = C.IPV6_RECVTCLASS
+
+	sysIPV6_AUTOFLOWLABEL = C.IPV6_AUTOFLOWLABEL
+	sysIPV6_IPCOMP_LEVEL  = C.IPV6_IPCOMP_LEVEL
+
+	sysIPV6_TCLASS   = C.IPV6_TCLASS
+	sysIPV6_DONTFRAG = C.IPV6_DONTFRAG
+	sysIPV6_PIPEX    = C.IPV6_PIPEX
+
+	sysIPV6_RTABLE = C.IPV6_RTABLE
+
+	sysIPV6_PORTRANGE_DEFAULT = C.IPV6_PORTRANGE_DEFAULT
+	sysIPV6_PORTRANGE_HIGH    = C.IPV6_PORTRANGE_HIGH
+	sysIPV6_PORTRANGE_LOW     = C.IPV6_PORTRANGE_LOW
+
+	sizeofSockaddrInet6 = C.sizeof_struct_sockaddr_in6
+	sizeofInet6Pktinfo  = C.sizeof_struct_in6_pktinfo
+	sizeofIPv6Mtuinfo   = C.sizeof_struct_ip6_mtuinfo
+
+	sizeofIPv6Mreq = C.sizeof_struct_ipv6_mreq
+
+	sizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
+)
+
+type sockaddrInet6 C.struct_sockaddr_in6
+
+type inet6Pktinfo C.struct_in6_pktinfo
+
+type ipv6Mtuinfo C.struct_ip6_mtuinfo
+
+type ipv6Mreq C.struct_ipv6_mreq
+
+type icmpv6Filter C.struct_icmp6_filter

+ 114 - 0
vendor/golang.org/x/net/ipv6/defs_solaris.go

@@ -0,0 +1,114 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// +godefs map struct_in6_addr [16]byte /* in6_addr */
+
+package ipv6
+
+/*
+#include <sys/socket.h>
+
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+*/
+import "C"
+
+const (
+	sysIPV6_UNICAST_HOPS   = C.IPV6_UNICAST_HOPS
+	sysIPV6_MULTICAST_IF   = C.IPV6_MULTICAST_IF
+	sysIPV6_MULTICAST_HOPS = C.IPV6_MULTICAST_HOPS
+	sysIPV6_MULTICAST_LOOP = C.IPV6_MULTICAST_LOOP
+	sysIPV6_JOIN_GROUP     = C.IPV6_JOIN_GROUP
+	sysIPV6_LEAVE_GROUP    = C.IPV6_LEAVE_GROUP
+
+	sysIPV6_PKTINFO = C.IPV6_PKTINFO
+
+	sysIPV6_HOPLIMIT = C.IPV6_HOPLIMIT
+	sysIPV6_NEXTHOP  = C.IPV6_NEXTHOP
+	sysIPV6_HOPOPTS  = C.IPV6_HOPOPTS
+	sysIPV6_DSTOPTS  = C.IPV6_DSTOPTS
+
+	sysIPV6_RTHDR        = C.IPV6_RTHDR
+	sysIPV6_RTHDRDSTOPTS = C.IPV6_RTHDRDSTOPTS
+
+	sysIPV6_RECVPKTINFO  = C.IPV6_RECVPKTINFO
+	sysIPV6_RECVHOPLIMIT = C.IPV6_RECVHOPLIMIT
+	sysIPV6_RECVHOPOPTS  = C.IPV6_RECVHOPOPTS
+
+	sysIPV6_RECVRTHDR = C.IPV6_RECVRTHDR
+
+	sysIPV6_RECVRTHDRDSTOPTS = C.IPV6_RECVRTHDRDSTOPTS
+
+	sysIPV6_CHECKSUM        = C.IPV6_CHECKSUM
+	sysIPV6_RECVTCLASS      = C.IPV6_RECVTCLASS
+	sysIPV6_USE_MIN_MTU     = C.IPV6_USE_MIN_MTU
+	sysIPV6_DONTFRAG        = C.IPV6_DONTFRAG
+	sysIPV6_SEC_OPT         = C.IPV6_SEC_OPT
+	sysIPV6_SRC_PREFERENCES = C.IPV6_SRC_PREFERENCES
+	sysIPV6_RECVPATHMTU     = C.IPV6_RECVPATHMTU
+	sysIPV6_PATHMTU         = C.IPV6_PATHMTU
+	sysIPV6_TCLASS          = C.IPV6_TCLASS
+	sysIPV6_V6ONLY          = C.IPV6_V6ONLY
+
+	sysIPV6_RECVDSTOPTS = C.IPV6_RECVDSTOPTS
+
+	sysMCAST_JOIN_GROUP         = C.MCAST_JOIN_GROUP
+	sysMCAST_LEAVE_GROUP        = C.MCAST_LEAVE_GROUP
+	sysMCAST_BLOCK_SOURCE       = C.MCAST_BLOCK_SOURCE
+	sysMCAST_UNBLOCK_SOURCE     = C.MCAST_UNBLOCK_SOURCE
+	sysMCAST_JOIN_SOURCE_GROUP  = C.MCAST_JOIN_SOURCE_GROUP
+	sysMCAST_LEAVE_SOURCE_GROUP = C.MCAST_LEAVE_SOURCE_GROUP
+
+	sysIPV6_PREFER_SRC_HOME   = C.IPV6_PREFER_SRC_HOME
+	sysIPV6_PREFER_SRC_COA    = C.IPV6_PREFER_SRC_COA
+	sysIPV6_PREFER_SRC_PUBLIC = C.IPV6_PREFER_SRC_PUBLIC
+	sysIPV6_PREFER_SRC_TMP    = C.IPV6_PREFER_SRC_TMP
+	sysIPV6_PREFER_SRC_NONCGA = C.IPV6_PREFER_SRC_NONCGA
+	sysIPV6_PREFER_SRC_CGA    = C.IPV6_PREFER_SRC_CGA
+
+	sysIPV6_PREFER_SRC_MIPMASK    = C.IPV6_PREFER_SRC_MIPMASK
+	sysIPV6_PREFER_SRC_MIPDEFAULT = C.IPV6_PREFER_SRC_MIPDEFAULT
+	sysIPV6_PREFER_SRC_TMPMASK    = C.IPV6_PREFER_SRC_TMPMASK
+	sysIPV6_PREFER_SRC_TMPDEFAULT = C.IPV6_PREFER_SRC_TMPDEFAULT
+	sysIPV6_PREFER_SRC_CGAMASK    = C.IPV6_PREFER_SRC_CGAMASK
+	sysIPV6_PREFER_SRC_CGADEFAULT = C.IPV6_PREFER_SRC_CGADEFAULT
+
+	sysIPV6_PREFER_SRC_MASK = C.IPV6_PREFER_SRC_MASK
+
+	sysIPV6_PREFER_SRC_DEFAULT = C.IPV6_PREFER_SRC_DEFAULT
+
+	sysIPV6_BOUND_IF   = C.IPV6_BOUND_IF
+	sysIPV6_UNSPEC_SRC = C.IPV6_UNSPEC_SRC
+
+	sysICMP6_FILTER = C.ICMP6_FILTER
+
+	sizeofSockaddrStorage = C.sizeof_struct_sockaddr_storage
+	sizeofSockaddrInet6   = C.sizeof_struct_sockaddr_in6
+	sizeofInet6Pktinfo    = C.sizeof_struct_in6_pktinfo
+	sizeofIPv6Mtuinfo     = C.sizeof_struct_ip6_mtuinfo
+
+	sizeofIPv6Mreq       = C.sizeof_struct_ipv6_mreq
+	sizeofGroupReq       = C.sizeof_struct_group_req
+	sizeofGroupSourceReq = C.sizeof_struct_group_source_req
+
+	sizeofICMPv6Filter = C.sizeof_struct_icmp6_filter
+)
+
+type sockaddrStorage C.struct_sockaddr_storage
+
+type sockaddrInet6 C.struct_sockaddr_in6
+
+type inet6Pktinfo C.struct_in6_pktinfo
+
+type ipv6Mtuinfo C.struct_ip6_mtuinfo
+
+type ipv6Mreq C.struct_ipv6_mreq
+
+type groupReq C.struct_group_req
+
+type groupSourceReq C.struct_group_source_req
+
+type icmpv6Filter C.struct_icmp6_filter

+ 302 - 0
vendor/golang.org/x/net/ipv6/dgramopt.go

@@ -0,0 +1,302 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"net"
+	"syscall"
+
+	"golang.org/x/net/bpf"
+)
+
+// MulticastHopLimit returns the hop limit field value for outgoing
+// multicast packets.
+func (c *dgramOpt) MulticastHopLimit() (int, error) {
+	if !c.ok() {
+		return 0, syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoMulticastHopLimit]
+	if !ok {
+		return 0, errOpNoSupport
+	}
+	return so.GetInt(c.Conn)
+}
+
+// SetMulticastHopLimit sets the hop limit field value for future
+// outgoing multicast packets.
+func (c *dgramOpt) SetMulticastHopLimit(hoplim int) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoMulticastHopLimit]
+	if !ok {
+		return errOpNoSupport
+	}
+	return so.SetInt(c.Conn, hoplim)
+}
+
+// MulticastInterface returns the default interface for multicast
+// packet transmissions.
+func (c *dgramOpt) MulticastInterface() (*net.Interface, error) {
+	if !c.ok() {
+		return nil, syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoMulticastInterface]
+	if !ok {
+		return nil, errOpNoSupport
+	}
+	return so.getMulticastInterface(c.Conn)
+}
+
+// SetMulticastInterface sets the default interface for future
+// multicast packet transmissions.
+func (c *dgramOpt) SetMulticastInterface(ifi *net.Interface) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoMulticastInterface]
+	if !ok {
+		return errOpNoSupport
+	}
+	return so.setMulticastInterface(c.Conn, ifi)
+}
+
+// MulticastLoopback reports whether transmitted multicast packets
+// should be copied and send back to the originator.
+func (c *dgramOpt) MulticastLoopback() (bool, error) {
+	if !c.ok() {
+		return false, syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoMulticastLoopback]
+	if !ok {
+		return false, errOpNoSupport
+	}
+	on, err := so.GetInt(c.Conn)
+	if err != nil {
+		return false, err
+	}
+	return on == 1, nil
+}
+
+// SetMulticastLoopback sets whether transmitted multicast packets
+// should be copied and send back to the originator.
+func (c *dgramOpt) SetMulticastLoopback(on bool) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoMulticastLoopback]
+	if !ok {
+		return errOpNoSupport
+	}
+	return so.SetInt(c.Conn, boolint(on))
+}
+
+// JoinGroup joins the group address group on the interface ifi.
+// By default all sources that can cast data to group are accepted.
+// It's possible to mute and unmute data transmission from a specific
+// source by using ExcludeSourceSpecificGroup and
+// IncludeSourceSpecificGroup.
+// JoinGroup uses the system assigned multicast interface when ifi is
+// nil, although this is not recommended because the assignment
+// depends on platforms and sometimes it might require routing
+// configuration.
+func (c *dgramOpt) JoinGroup(ifi *net.Interface, group net.Addr) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoJoinGroup]
+	if !ok {
+		return errOpNoSupport
+	}
+	grp := netAddrToIP16(group)
+	if grp == nil {
+		return errMissingAddress
+	}
+	return so.setGroup(c.Conn, ifi, grp)
+}
+
+// LeaveGroup leaves the group address group on the interface ifi
+// regardless of whether the group is any-source group or
+// source-specific group.
+func (c *dgramOpt) LeaveGroup(ifi *net.Interface, group net.Addr) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoLeaveGroup]
+	if !ok {
+		return errOpNoSupport
+	}
+	grp := netAddrToIP16(group)
+	if grp == nil {
+		return errMissingAddress
+	}
+	return so.setGroup(c.Conn, ifi, grp)
+}
+
+// JoinSourceSpecificGroup joins the source-specific group comprising
+// group and source on the interface ifi.
+// JoinSourceSpecificGroup uses the system assigned multicast
+// interface when ifi is nil, although this is not recommended because
+// the assignment depends on platforms and sometimes it might require
+// routing configuration.
+func (c *dgramOpt) JoinSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoJoinSourceGroup]
+	if !ok {
+		return errOpNoSupport
+	}
+	grp := netAddrToIP16(group)
+	if grp == nil {
+		return errMissingAddress
+	}
+	src := netAddrToIP16(source)
+	if src == nil {
+		return errMissingAddress
+	}
+	return so.setSourceGroup(c.Conn, ifi, grp, src)
+}
+
+// LeaveSourceSpecificGroup leaves the source-specific group on the
+// interface ifi.
+func (c *dgramOpt) LeaveSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoLeaveSourceGroup]
+	if !ok {
+		return errOpNoSupport
+	}
+	grp := netAddrToIP16(group)
+	if grp == nil {
+		return errMissingAddress
+	}
+	src := netAddrToIP16(source)
+	if src == nil {
+		return errMissingAddress
+	}
+	return so.setSourceGroup(c.Conn, ifi, grp, src)
+}
+
+// ExcludeSourceSpecificGroup excludes the source-specific group from
+// the already joined any-source groups by JoinGroup on the interface
+// ifi.
+func (c *dgramOpt) ExcludeSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoBlockSourceGroup]
+	if !ok {
+		return errOpNoSupport
+	}
+	grp := netAddrToIP16(group)
+	if grp == nil {
+		return errMissingAddress
+	}
+	src := netAddrToIP16(source)
+	if src == nil {
+		return errMissingAddress
+	}
+	return so.setSourceGroup(c.Conn, ifi, grp, src)
+}
+
+// IncludeSourceSpecificGroup includes the excluded source-specific
+// group by ExcludeSourceSpecificGroup again on the interface ifi.
+func (c *dgramOpt) IncludeSourceSpecificGroup(ifi *net.Interface, group, source net.Addr) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoUnblockSourceGroup]
+	if !ok {
+		return errOpNoSupport
+	}
+	grp := netAddrToIP16(group)
+	if grp == nil {
+		return errMissingAddress
+	}
+	src := netAddrToIP16(source)
+	if src == nil {
+		return errMissingAddress
+	}
+	return so.setSourceGroup(c.Conn, ifi, grp, src)
+}
+
+// Checksum reports whether the kernel will compute, store or verify a
+// checksum for both incoming and outgoing packets. If on is true, it
+// returns an offset in bytes into the data of where the checksum
+// field is located.
+func (c *dgramOpt) Checksum() (on bool, offset int, err error) {
+	if !c.ok() {
+		return false, 0, syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoChecksum]
+	if !ok {
+		return false, 0, errOpNoSupport
+	}
+	offset, err = so.GetInt(c.Conn)
+	if err != nil {
+		return false, 0, err
+	}
+	if offset < 0 {
+		return false, 0, nil
+	}
+	return true, offset, nil
+}
+
+// SetChecksum enables the kernel checksum processing. If on is ture,
+// the offset should be an offset in bytes into the data of where the
+// checksum field is located.
+func (c *dgramOpt) SetChecksum(on bool, offset int) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoChecksum]
+	if !ok {
+		return errOpNoSupport
+	}
+	if !on {
+		offset = -1
+	}
+	return so.SetInt(c.Conn, offset)
+}
+
+// ICMPFilter returns an ICMP filter.
+func (c *dgramOpt) ICMPFilter() (*ICMPFilter, error) {
+	if !c.ok() {
+		return nil, syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoICMPFilter]
+	if !ok {
+		return nil, errOpNoSupport
+	}
+	return so.getICMPFilter(c.Conn)
+}
+
+// SetICMPFilter deploys the ICMP filter.
+func (c *dgramOpt) SetICMPFilter(f *ICMPFilter) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoICMPFilter]
+	if !ok {
+		return errOpNoSupport
+	}
+	return so.setICMPFilter(c.Conn, f)
+}
+
+// SetBPF attaches a BPF program to the connection.
+//
+// Only supported on Linux.
+func (c *dgramOpt) SetBPF(filter []bpf.RawInstruction) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoAttachFilter]
+	if !ok {
+		return errOpNoSupport
+	}
+	return so.setBPF(c.Conn, filter)
+}

+ 243 - 0
vendor/golang.org/x/net/ipv6/doc.go

@@ -0,0 +1,243 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package ipv6 implements IP-level socket options for the Internet
+// Protocol version 6.
+//
+// The package provides IP-level socket options that allow
+// manipulation of IPv6 facilities.
+//
+// The IPv6 protocol is defined in RFC 8200.
+// Socket interface extensions are defined in RFC 3493, RFC 3542 and
+// RFC 3678.
+// MLDv1 and MLDv2 are defined in RFC 2710 and RFC 3810.
+// Source-specific multicast is defined in RFC 4607.
+//
+// On Darwin, this package requires OS X Mavericks version 10.9 or
+// above, or equivalent.
+//
+//
+// Unicasting
+//
+// The options for unicasting are available for net.TCPConn,
+// net.UDPConn and net.IPConn which are created as network connections
+// that use the IPv6 transport. When a single TCP connection carrying
+// a data flow of multiple packets needs to indicate the flow is
+// important, Conn is used to set the traffic class field on the IPv6
+// header for each packet.
+//
+//	ln, err := net.Listen("tcp6", "[::]:1024")
+//	if err != nil {
+//		// error handling
+//	}
+//	defer ln.Close()
+//	for {
+//		c, err := ln.Accept()
+//		if err != nil {
+//			// error handling
+//		}
+//		go func(c net.Conn) {
+//			defer c.Close()
+//
+// The outgoing packets will be labeled DiffServ assured forwarding
+// class 1 low drop precedence, known as AF11 packets.
+//
+//			if err := ipv6.NewConn(c).SetTrafficClass(0x28); err != nil {
+//				// error handling
+//			}
+//			if _, err := c.Write(data); err != nil {
+//				// error handling
+//			}
+//		}(c)
+//	}
+//
+//
+// Multicasting
+//
+// The options for multicasting are available for net.UDPConn and
+// net.IPconn which are created as network connections that use the
+// IPv6 transport. A few network facilities must be prepared before
+// you begin multicasting, at a minimum joining network interfaces and
+// multicast groups.
+//
+//	en0, err := net.InterfaceByName("en0")
+//	if err != nil {
+//		// error handling
+//	}
+//	en1, err := net.InterfaceByIndex(911)
+//	if err != nil {
+//		// error handling
+//	}
+//	group := net.ParseIP("ff02::114")
+//
+// First, an application listens to an appropriate address with an
+// appropriate service port.
+//
+//	c, err := net.ListenPacket("udp6", "[::]:1024")
+//	if err != nil {
+//		// error handling
+//	}
+//	defer c.Close()
+//
+// Second, the application joins multicast groups, starts listening to
+// the groups on the specified network interfaces. Note that the
+// service port for transport layer protocol does not matter with this
+// operation as joining groups affects only network and link layer
+// protocols, such as IPv6 and Ethernet.
+//
+//	p := ipv6.NewPacketConn(c)
+//	if err := p.JoinGroup(en0, &net.UDPAddr{IP: group}); err != nil {
+//		// error handling
+//	}
+//	if err := p.JoinGroup(en1, &net.UDPAddr{IP: group}); err != nil {
+//		// error handling
+//	}
+//
+// The application might set per packet control message transmissions
+// between the protocol stack within the kernel. When the application
+// needs a destination address on an incoming packet,
+// SetControlMessage of PacketConn is used to enable control message
+// transmissions.
+//
+//	if err := p.SetControlMessage(ipv6.FlagDst, true); err != nil {
+//		// error handling
+//	}
+//
+// The application could identify whether the received packets are
+// of interest by using the control message that contains the
+// destination address of the received packet.
+//
+//	b := make([]byte, 1500)
+//	for {
+//		n, rcm, src, err := p.ReadFrom(b)
+//		if err != nil {
+//			// error handling
+//		}
+//		if rcm.Dst.IsMulticast() {
+//			if rcm.Dst.Equal(group) {
+//				// joined group, do something
+//			} else {
+//				// unknown group, discard
+//				continue
+//			}
+//		}
+//
+// The application can also send both unicast and multicast packets.
+//
+//		p.SetTrafficClass(0x0)
+//		p.SetHopLimit(16)
+//		if _, err := p.WriteTo(data[:n], nil, src); err != nil {
+//			// error handling
+//		}
+//		dst := &net.UDPAddr{IP: group, Port: 1024}
+//		wcm := ipv6.ControlMessage{TrafficClass: 0xe0, HopLimit: 1}
+//		for _, ifi := range []*net.Interface{en0, en1} {
+//			wcm.IfIndex = ifi.Index
+//			if _, err := p.WriteTo(data[:n], &wcm, dst); err != nil {
+//				// error handling
+//			}
+//		}
+//	}
+//
+//
+// More multicasting
+//
+// An application that uses PacketConn may join multiple multicast
+// groups. For example, a UDP listener with port 1024 might join two
+// different groups across over two different network interfaces by
+// using:
+//
+//	c, err := net.ListenPacket("udp6", "[::]:1024")
+//	if err != nil {
+//		// error handling
+//	}
+//	defer c.Close()
+//	p := ipv6.NewPacketConn(c)
+//	if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::1:114")}); err != nil {
+//		// error handling
+//	}
+//	if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::2:114")}); err != nil {
+//		// error handling
+//	}
+//	if err := p.JoinGroup(en1, &net.UDPAddr{IP: net.ParseIP("ff02::2:114")}); err != nil {
+//		// error handling
+//	}
+//
+// It is possible for multiple UDP listeners that listen on the same
+// UDP port to join the same multicast group. The net package will
+// provide a socket that listens to a wildcard address with reusable
+// UDP port when an appropriate multicast address prefix is passed to
+// the net.ListenPacket or net.ListenUDP.
+//
+//	c1, err := net.ListenPacket("udp6", "[ff02::]:1024")
+//	if err != nil {
+//		// error handling
+//	}
+//	defer c1.Close()
+//	c2, err := net.ListenPacket("udp6", "[ff02::]:1024")
+//	if err != nil {
+//		// error handling
+//	}
+//	defer c2.Close()
+//	p1 := ipv6.NewPacketConn(c1)
+//	if err := p1.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::114")}); err != nil {
+//		// error handling
+//	}
+//	p2 := ipv6.NewPacketConn(c2)
+//	if err := p2.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::114")}); err != nil {
+//		// error handling
+//	}
+//
+// Also it is possible for the application to leave or rejoin a
+// multicast group on the network interface.
+//
+//	if err := p.LeaveGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff02::114")}); err != nil {
+//		// error handling
+//	}
+//	if err := p.JoinGroup(en0, &net.UDPAddr{IP: net.ParseIP("ff01::114")}); err != nil {
+//		// error handling
+//	}
+//
+//
+// Source-specific multicasting
+//
+// An application that uses PacketConn on MLDv2 supported platform is
+// able to join source-specific multicast groups.
+// The application may use JoinSourceSpecificGroup and
+// LeaveSourceSpecificGroup for the operation known as "include" mode,
+//
+//	ssmgroup := net.UDPAddr{IP: net.ParseIP("ff32::8000:9")}
+//	ssmsource := net.UDPAddr{IP: net.ParseIP("fe80::cafe")}
+//	if err := p.JoinSourceSpecificGroup(en0, &ssmgroup, &ssmsource); err != nil {
+//		// error handling
+//	}
+//	if err := p.LeaveSourceSpecificGroup(en0, &ssmgroup, &ssmsource); err != nil {
+//		// error handling
+//	}
+//
+// or JoinGroup, ExcludeSourceSpecificGroup,
+// IncludeSourceSpecificGroup and LeaveGroup for the operation known
+// as "exclude" mode.
+//
+//	exclsource := net.UDPAddr{IP: net.ParseIP("fe80::dead")}
+//	if err := p.JoinGroup(en0, &ssmgroup); err != nil {
+//		// error handling
+//	}
+//	if err := p.ExcludeSourceSpecificGroup(en0, &ssmgroup, &exclsource); err != nil {
+//		// error handling
+//	}
+//	if err := p.LeaveGroup(en0, &ssmgroup); err != nil {
+//		// error handling
+//	}
+//
+// Note that it depends on each platform implementation what happens
+// when an application which runs on MLDv2 unsupported platform uses
+// JoinSourceSpecificGroup and LeaveSourceSpecificGroup.
+// In general the platform tries to fall back to conversations using
+// MLDv1 and starts to listen to multicast traffic.
+// In the fallback case, ExcludeSourceSpecificGroup and
+// IncludeSourceSpecificGroup may return an error.
+package ipv6 // import "golang.org/x/net/ipv6"
+
+// BUG(mikio): This package is not implemented on NaCl and Plan 9.

+ 128 - 0
vendor/golang.org/x/net/ipv6/endpoint.go

@@ -0,0 +1,128 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"net"
+	"syscall"
+	"time"
+
+	"golang.org/x/net/internal/socket"
+)
+
+// BUG(mikio): On Windows, the JoinSourceSpecificGroup,
+// LeaveSourceSpecificGroup, ExcludeSourceSpecificGroup and
+// IncludeSourceSpecificGroup methods of PacketConn are not
+// implemented.
+
+// A Conn represents a network endpoint that uses IPv6 transport.
+// It allows to set basic IP-level socket options such as traffic
+// class and hop limit.
+type Conn struct {
+	genericOpt
+}
+
+type genericOpt struct {
+	*socket.Conn
+}
+
+func (c *genericOpt) ok() bool { return c != nil && c.Conn != nil }
+
+// PathMTU returns a path MTU value for the destination associated
+// with the endpoint.
+func (c *Conn) PathMTU() (int, error) {
+	if !c.ok() {
+		return 0, syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoPathMTU]
+	if !ok {
+		return 0, errOpNoSupport
+	}
+	_, mtu, err := so.getMTUInfo(c.Conn)
+	if err != nil {
+		return 0, err
+	}
+	return mtu, nil
+}
+
+// NewConn returns a new Conn.
+func NewConn(c net.Conn) *Conn {
+	cc, _ := socket.NewConn(c)
+	return &Conn{
+		genericOpt: genericOpt{Conn: cc},
+	}
+}
+
+// A PacketConn represents a packet network endpoint that uses IPv6
+// transport. It is used to control several IP-level socket options
+// including IPv6 header manipulation. It also provides datagram
+// based network I/O methods specific to the IPv6 and higher layer
+// protocols such as OSPF, GRE, and UDP.
+type PacketConn struct {
+	genericOpt
+	dgramOpt
+	payloadHandler
+}
+
+type dgramOpt struct {
+	*socket.Conn
+}
+
+func (c *dgramOpt) ok() bool { return c != nil && c.Conn != nil }
+
+// SetControlMessage allows to receive the per packet basis IP-level
+// socket options.
+func (c *PacketConn) SetControlMessage(cf ControlFlags, on bool) error {
+	if !c.payloadHandler.ok() {
+		return syscall.EINVAL
+	}
+	return setControlMessage(c.dgramOpt.Conn, &c.payloadHandler.rawOpt, cf, on)
+}
+
+// SetDeadline sets the read and write deadlines associated with the
+// endpoint.
+func (c *PacketConn) SetDeadline(t time.Time) error {
+	if !c.payloadHandler.ok() {
+		return syscall.EINVAL
+	}
+	return c.payloadHandler.SetDeadline(t)
+}
+
+// SetReadDeadline sets the read deadline associated with the
+// endpoint.
+func (c *PacketConn) SetReadDeadline(t time.Time) error {
+	if !c.payloadHandler.ok() {
+		return syscall.EINVAL
+	}
+	return c.payloadHandler.SetReadDeadline(t)
+}
+
+// SetWriteDeadline sets the write deadline associated with the
+// endpoint.
+func (c *PacketConn) SetWriteDeadline(t time.Time) error {
+	if !c.payloadHandler.ok() {
+		return syscall.EINVAL
+	}
+	return c.payloadHandler.SetWriteDeadline(t)
+}
+
+// Close closes the endpoint.
+func (c *PacketConn) Close() error {
+	if !c.payloadHandler.ok() {
+		return syscall.EINVAL
+	}
+	return c.payloadHandler.Close()
+}
+
+// NewPacketConn returns a new PacketConn using c as its underlying
+// transport.
+func NewPacketConn(c net.PacketConn) *PacketConn {
+	cc, _ := socket.NewConn(c.(net.Conn))
+	return &PacketConn{
+		genericOpt:     genericOpt{Conn: cc},
+		dgramOpt:       dgramOpt{Conn: cc},
+		payloadHandler: payloadHandler{PacketConn: c, Conn: cc},
+	}
+}

+ 199 - 0
vendor/golang.org/x/net/ipv6/gen.go

@@ -0,0 +1,199 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+//go:generate go run gen.go
+
+// This program generates system adaptation constants and types,
+// internet protocol constants and tables by reading template files
+// and IANA protocol registries.
+package main
+
+import (
+	"bytes"
+	"encoding/xml"
+	"fmt"
+	"go/format"
+	"io"
+	"io/ioutil"
+	"net/http"
+	"os"
+	"os/exec"
+	"runtime"
+	"strconv"
+	"strings"
+)
+
+func main() {
+	if err := genzsys(); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+	if err := geniana(); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+}
+
+func genzsys() error {
+	defs := "defs_" + runtime.GOOS + ".go"
+	f, err := os.Open(defs)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+	f.Close()
+	cmd := exec.Command("go", "tool", "cgo", "-godefs", defs)
+	b, err := cmd.Output()
+	if err != nil {
+		return err
+	}
+	b, err = format.Source(b)
+	if err != nil {
+		return err
+	}
+	zsys := "zsys_" + runtime.GOOS + ".go"
+	switch runtime.GOOS {
+	case "freebsd", "linux":
+		zsys = "zsys_" + runtime.GOOS + "_" + runtime.GOARCH + ".go"
+	}
+	if err := ioutil.WriteFile(zsys, b, 0644); err != nil {
+		return err
+	}
+	return nil
+}
+
+var registries = []struct {
+	url   string
+	parse func(io.Writer, io.Reader) error
+}{
+	{
+		"https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml",
+		parseICMPv6Parameters,
+	},
+}
+
+func geniana() error {
+	var bb bytes.Buffer
+	fmt.Fprintf(&bb, "// go generate gen.go\n")
+	fmt.Fprintf(&bb, "// Code generated by the command above; DO NOT EDIT.\n\n")
+	fmt.Fprintf(&bb, "package ipv6\n\n")
+	for _, r := range registries {
+		resp, err := http.Get(r.url)
+		if err != nil {
+			return err
+		}
+		defer resp.Body.Close()
+		if resp.StatusCode != http.StatusOK {
+			return fmt.Errorf("got HTTP status code %v for %v\n", resp.StatusCode, r.url)
+		}
+		if err := r.parse(&bb, resp.Body); err != nil {
+			return err
+		}
+		fmt.Fprintf(&bb, "\n")
+	}
+	b, err := format.Source(bb.Bytes())
+	if err != nil {
+		return err
+	}
+	if err := ioutil.WriteFile("iana.go", b, 0644); err != nil {
+		return err
+	}
+	return nil
+}
+
+func parseICMPv6Parameters(w io.Writer, r io.Reader) error {
+	dec := xml.NewDecoder(r)
+	var icp icmpv6Parameters
+	if err := dec.Decode(&icp); err != nil {
+		return err
+	}
+	prs := icp.escape()
+	fmt.Fprintf(w, "// %s, Updated: %s\n", icp.Title, icp.Updated)
+	fmt.Fprintf(w, "const (\n")
+	for _, pr := range prs {
+		if pr.Name == "" {
+			continue
+		}
+		fmt.Fprintf(w, "ICMPType%s ICMPType = %d", pr.Name, pr.Value)
+		fmt.Fprintf(w, "// %s\n", pr.OrigName)
+	}
+	fmt.Fprintf(w, ")\n\n")
+	fmt.Fprintf(w, "// %s, Updated: %s\n", icp.Title, icp.Updated)
+	fmt.Fprintf(w, "var icmpTypes = map[ICMPType]string{\n")
+	for _, pr := range prs {
+		if pr.Name == "" {
+			continue
+		}
+		fmt.Fprintf(w, "%d: %q,\n", pr.Value, strings.ToLower(pr.OrigName))
+	}
+	fmt.Fprintf(w, "}\n")
+	return nil
+}
+
+type icmpv6Parameters struct {
+	XMLName    xml.Name `xml:"registry"`
+	Title      string   `xml:"title"`
+	Updated    string   `xml:"updated"`
+	Registries []struct {
+		Title   string `xml:"title"`
+		Records []struct {
+			Value string `xml:"value"`
+			Name  string `xml:"name"`
+		} `xml:"record"`
+	} `xml:"registry"`
+}
+
+type canonICMPv6ParamRecord struct {
+	OrigName string
+	Name     string
+	Value    int
+}
+
+func (icp *icmpv6Parameters) escape() []canonICMPv6ParamRecord {
+	id := -1
+	for i, r := range icp.Registries {
+		if strings.Contains(r.Title, "Type") || strings.Contains(r.Title, "type") {
+			id = i
+			break
+		}
+	}
+	if id < 0 {
+		return nil
+	}
+	prs := make([]canonICMPv6ParamRecord, len(icp.Registries[id].Records))
+	sr := strings.NewReplacer(
+		"Messages", "",
+		"Message", "",
+		"ICMP", "",
+		"+", "P",
+		"-", "",
+		"/", "",
+		".", "",
+		" ", "",
+	)
+	for i, pr := range icp.Registries[id].Records {
+		if strings.Contains(pr.Name, "Reserved") ||
+			strings.Contains(pr.Name, "Unassigned") ||
+			strings.Contains(pr.Name, "Deprecated") ||
+			strings.Contains(pr.Name, "Experiment") ||
+			strings.Contains(pr.Name, "experiment") {
+			continue
+		}
+		ss := strings.Split(pr.Name, "\n")
+		if len(ss) > 1 {
+			prs[i].Name = strings.Join(ss, " ")
+		} else {
+			prs[i].Name = ss[0]
+		}
+		s := strings.TrimSpace(prs[i].Name)
+		prs[i].OrigName = s
+		prs[i].Name = sr.Replace(s)
+		prs[i].Value, _ = strconv.Atoi(pr.Value)
+	}
+	return prs
+}

+ 58 - 0
vendor/golang.org/x/net/ipv6/genericopt.go

@@ -0,0 +1,58 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import "syscall"
+
+// TrafficClass returns the traffic class field value for outgoing
+// packets.
+func (c *genericOpt) TrafficClass() (int, error) {
+	if !c.ok() {
+		return 0, syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoTrafficClass]
+	if !ok {
+		return 0, errOpNoSupport
+	}
+	return so.GetInt(c.Conn)
+}
+
+// SetTrafficClass sets the traffic class field value for future
+// outgoing packets.
+func (c *genericOpt) SetTrafficClass(tclass int) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoTrafficClass]
+	if !ok {
+		return errOpNoSupport
+	}
+	return so.SetInt(c.Conn, tclass)
+}
+
+// HopLimit returns the hop limit field value for outgoing packets.
+func (c *genericOpt) HopLimit() (int, error) {
+	if !c.ok() {
+		return 0, syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoHopLimit]
+	if !ok {
+		return 0, errOpNoSupport
+	}
+	return so.GetInt(c.Conn)
+}
+
+// SetHopLimit sets the hop limit field value for future outgoing
+// packets.
+func (c *genericOpt) SetHopLimit(hoplim int) error {
+	if !c.ok() {
+		return syscall.EINVAL
+	}
+	so, ok := sockOpts[ssoHopLimit]
+	if !ok {
+		return errOpNoSupport
+	}
+	return so.SetInt(c.Conn, hoplim)
+}

+ 55 - 0
vendor/golang.org/x/net/ipv6/header.go

@@ -0,0 +1,55 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"encoding/binary"
+	"fmt"
+	"net"
+)
+
+const (
+	Version   = 6  // protocol version
+	HeaderLen = 40 // header length
+)
+
+// A Header represents an IPv6 base header.
+type Header struct {
+	Version      int    // protocol version
+	TrafficClass int    // traffic class
+	FlowLabel    int    // flow label
+	PayloadLen   int    // payload length
+	NextHeader   int    // next header
+	HopLimit     int    // hop limit
+	Src          net.IP // source address
+	Dst          net.IP // destination address
+}
+
+func (h *Header) String() string {
+	if h == nil {
+		return "<nil>"
+	}
+	return fmt.Sprintf("ver=%d tclass=%#x flowlbl=%#x payloadlen=%d nxthdr=%d hoplim=%d src=%v dst=%v", h.Version, h.TrafficClass, h.FlowLabel, h.PayloadLen, h.NextHeader, h.HopLimit, h.Src, h.Dst)
+}
+
+// ParseHeader parses b as an IPv6 base header.
+func ParseHeader(b []byte) (*Header, error) {
+	if len(b) < HeaderLen {
+		return nil, errHeaderTooShort
+	}
+	h := &Header{
+		Version:      int(b[0]) >> 4,
+		TrafficClass: int(b[0]&0x0f)<<4 | int(b[1])>>4,
+		FlowLabel:    int(b[1]&0x0f)<<16 | int(b[2])<<8 | int(b[3]),
+		PayloadLen:   int(binary.BigEndian.Uint16(b[4:6])),
+		NextHeader:   int(b[6]),
+		HopLimit:     int(b[7]),
+	}
+	h.Src = make(net.IP, net.IPv6len)
+	copy(h.Src, b[8:24])
+	h.Dst = make(net.IP, net.IPv6len)
+	copy(h.Dst, b[24:40])
+	return h, nil
+}

+ 57 - 0
vendor/golang.org/x/net/ipv6/helper.go

@@ -0,0 +1,57 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"errors"
+	"net"
+)
+
+var (
+	errMissingAddress  = errors.New("missing address")
+	errHeaderTooShort  = errors.New("header too short")
+	errInvalidConnType = errors.New("invalid conn type")
+	errOpNoSupport     = errors.New("operation not supported")
+	errNoSuchInterface = errors.New("no such interface")
+)
+
+func boolint(b bool) int {
+	if b {
+		return 1
+	}
+	return 0
+}
+
+func netAddrToIP16(a net.Addr) net.IP {
+	switch v := a.(type) {
+	case *net.UDPAddr:
+		if ip := v.IP.To16(); ip != nil && ip.To4() == nil {
+			return ip
+		}
+	case *net.IPAddr:
+		if ip := v.IP.To16(); ip != nil && ip.To4() == nil {
+			return ip
+		}
+	}
+	return nil
+}
+
+func opAddr(a net.Addr) net.Addr {
+	switch a.(type) {
+	case *net.TCPAddr:
+		if a == nil {
+			return nil
+		}
+	case *net.UDPAddr:
+		if a == nil {
+			return nil
+		}
+	case *net.IPAddr:
+		if a == nil {
+			return nil
+		}
+	}
+	return a
+}

+ 86 - 0
vendor/golang.org/x/net/ipv6/iana.go

@@ -0,0 +1,86 @@
+// go generate gen.go
+// Code generated by the command above; DO NOT EDIT.
+
+package ipv6
+
+// Internet Control Message Protocol version 6 (ICMPv6) Parameters, Updated: 2018-03-09
+const (
+	ICMPTypeDestinationUnreachable                ICMPType = 1   // Destination Unreachable
+	ICMPTypePacketTooBig                          ICMPType = 2   // Packet Too Big
+	ICMPTypeTimeExceeded                          ICMPType = 3   // Time Exceeded
+	ICMPTypeParameterProblem                      ICMPType = 4   // Parameter Problem
+	ICMPTypeEchoRequest                           ICMPType = 128 // Echo Request
+	ICMPTypeEchoReply                             ICMPType = 129 // Echo Reply
+	ICMPTypeMulticastListenerQuery                ICMPType = 130 // Multicast Listener Query
+	ICMPTypeMulticastListenerReport               ICMPType = 131 // Multicast Listener Report
+	ICMPTypeMulticastListenerDone                 ICMPType = 132 // Multicast Listener Done
+	ICMPTypeRouterSolicitation                    ICMPType = 133 // Router Solicitation
+	ICMPTypeRouterAdvertisement                   ICMPType = 134 // Router Advertisement
+	ICMPTypeNeighborSolicitation                  ICMPType = 135 // Neighbor Solicitation
+	ICMPTypeNeighborAdvertisement                 ICMPType = 136 // Neighbor Advertisement
+	ICMPTypeRedirect                              ICMPType = 137 // Redirect Message
+	ICMPTypeRouterRenumbering                     ICMPType = 138 // Router Renumbering
+	ICMPTypeNodeInformationQuery                  ICMPType = 139 // ICMP Node Information Query
+	ICMPTypeNodeInformationResponse               ICMPType = 140 // ICMP Node Information Response
+	ICMPTypeInverseNeighborDiscoverySolicitation  ICMPType = 141 // Inverse Neighbor Discovery Solicitation Message
+	ICMPTypeInverseNeighborDiscoveryAdvertisement ICMPType = 142 // Inverse Neighbor Discovery Advertisement Message
+	ICMPTypeVersion2MulticastListenerReport       ICMPType = 143 // Version 2 Multicast Listener Report
+	ICMPTypeHomeAgentAddressDiscoveryRequest      ICMPType = 144 // Home Agent Address Discovery Request Message
+	ICMPTypeHomeAgentAddressDiscoveryReply        ICMPType = 145 // Home Agent Address Discovery Reply Message
+	ICMPTypeMobilePrefixSolicitation              ICMPType = 146 // Mobile Prefix Solicitation
+	ICMPTypeMobilePrefixAdvertisement             ICMPType = 147 // Mobile Prefix Advertisement
+	ICMPTypeCertificationPathSolicitation         ICMPType = 148 // Certification Path Solicitation Message
+	ICMPTypeCertificationPathAdvertisement        ICMPType = 149 // Certification Path Advertisement Message
+	ICMPTypeMulticastRouterAdvertisement          ICMPType = 151 // Multicast Router Advertisement
+	ICMPTypeMulticastRouterSolicitation           ICMPType = 152 // Multicast Router Solicitation
+	ICMPTypeMulticastRouterTermination            ICMPType = 153 // Multicast Router Termination
+	ICMPTypeFMIPv6                                ICMPType = 154 // FMIPv6 Messages
+	ICMPTypeRPLControl                            ICMPType = 155 // RPL Control Message
+	ICMPTypeILNPv6LocatorUpdate                   ICMPType = 156 // ILNPv6 Locator Update Message
+	ICMPTypeDuplicateAddressRequest               ICMPType = 157 // Duplicate Address Request
+	ICMPTypeDuplicateAddressConfirmation          ICMPType = 158 // Duplicate Address Confirmation
+	ICMPTypeMPLControl                            ICMPType = 159 // MPL Control Message
+	ICMPTypeExtendedEchoRequest                   ICMPType = 160 // Extended Echo Request
+	ICMPTypeExtendedEchoReply                     ICMPType = 161 // Extended Echo Reply
+)
+
+// Internet Control Message Protocol version 6 (ICMPv6) Parameters, Updated: 2018-03-09
+var icmpTypes = map[ICMPType]string{
+	1:   "destination unreachable",
+	2:   "packet too big",
+	3:   "time exceeded",
+	4:   "parameter problem",
+	128: "echo request",
+	129: "echo reply",
+	130: "multicast listener query",
+	131: "multicast listener report",
+	132: "multicast listener done",
+	133: "router solicitation",
+	134: "router advertisement",
+	135: "neighbor solicitation",
+	136: "neighbor advertisement",
+	137: "redirect message",
+	138: "router renumbering",
+	139: "icmp node information query",
+	140: "icmp node information response",
+	141: "inverse neighbor discovery solicitation message",
+	142: "inverse neighbor discovery advertisement message",
+	143: "version 2 multicast listener report",
+	144: "home agent address discovery request message",
+	145: "home agent address discovery reply message",
+	146: "mobile prefix solicitation",
+	147: "mobile prefix advertisement",
+	148: "certification path solicitation message",
+	149: "certification path advertisement message",
+	151: "multicast router advertisement",
+	152: "multicast router solicitation",
+	153: "multicast router termination",
+	154: "fmipv6 messages",
+	155: "rpl control message",
+	156: "ilnpv6 locator update message",
+	157: "duplicate address request",
+	158: "duplicate address confirmation",
+	159: "mpl control message",
+	160: "extended echo request",
+	161: "extended echo reply",
+}

+ 60 - 0
vendor/golang.org/x/net/ipv6/icmp.go

@@ -0,0 +1,60 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import "golang.org/x/net/internal/iana"
+
+// BUG(mikio): On Windows, methods related to ICMPFilter are not
+// implemented.
+
+// An ICMPType represents a type of ICMP message.
+type ICMPType int
+
+func (typ ICMPType) String() string {
+	s, ok := icmpTypes[typ]
+	if !ok {
+		return "<nil>"
+	}
+	return s
+}
+
+// Protocol returns the ICMPv6 protocol number.
+func (typ ICMPType) Protocol() int {
+	return iana.ProtocolIPv6ICMP
+}
+
+// An ICMPFilter represents an ICMP message filter for incoming
+// packets. The filter belongs to a packet delivery path on a host and
+// it cannot interact with forwarding packets or tunnel-outer packets.
+//
+// Note: RFC 8200 defines a reasonable role model. A node means a
+// device that implements IP. A router means a node that forwards IP
+// packets not explicitly addressed to itself, and a host means a node
+// that is not a router.
+type ICMPFilter struct {
+	icmpv6Filter
+}
+
+// Accept accepts incoming ICMP packets including the type field value
+// typ.
+func (f *ICMPFilter) Accept(typ ICMPType) {
+	f.accept(typ)
+}
+
+// Block blocks incoming ICMP packets including the type field value
+// typ.
+func (f *ICMPFilter) Block(typ ICMPType) {
+	f.block(typ)
+}
+
+// SetAll sets the filter action to the filter.
+func (f *ICMPFilter) SetAll(block bool) {
+	f.setAll(block)
+}
+
+// WillBlock reports whether the ICMP type will be blocked.
+func (f *ICMPFilter) WillBlock(typ ICMPType) bool {
+	return f.willBlock(typ)
+}

+ 29 - 0
vendor/golang.org/x/net/ipv6/icmp_bsd.go

@@ -0,0 +1,29 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd netbsd openbsd
+
+package ipv6
+
+func (f *icmpv6Filter) accept(typ ICMPType) {
+	f.Filt[typ>>5] |= 1 << (uint32(typ) & 31)
+}
+
+func (f *icmpv6Filter) block(typ ICMPType) {
+	f.Filt[typ>>5] &^= 1 << (uint32(typ) & 31)
+}
+
+func (f *icmpv6Filter) setAll(block bool) {
+	for i := range f.Filt {
+		if block {
+			f.Filt[i] = 0
+		} else {
+			f.Filt[i] = 1<<32 - 1
+		}
+	}
+}
+
+func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
+	return f.Filt[typ>>5]&(1<<(uint32(typ)&31)) == 0
+}

+ 27 - 0
vendor/golang.org/x/net/ipv6/icmp_linux.go

@@ -0,0 +1,27 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+func (f *icmpv6Filter) accept(typ ICMPType) {
+	f.Data[typ>>5] &^= 1 << (uint32(typ) & 31)
+}
+
+func (f *icmpv6Filter) block(typ ICMPType) {
+	f.Data[typ>>5] |= 1 << (uint32(typ) & 31)
+}
+
+func (f *icmpv6Filter) setAll(block bool) {
+	for i := range f.Data {
+		if block {
+			f.Data[i] = 1<<32 - 1
+		} else {
+			f.Data[i] = 0
+		}
+	}
+}
+
+func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
+	return f.Data[typ>>5]&(1<<(uint32(typ)&31)) != 0
+}

+ 27 - 0
vendor/golang.org/x/net/ipv6/icmp_solaris.go

@@ -0,0 +1,27 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+func (f *icmpv6Filter) accept(typ ICMPType) {
+	f.X__icmp6_filt[typ>>5] |= 1 << (uint32(typ) & 31)
+}
+
+func (f *icmpv6Filter) block(typ ICMPType) {
+	f.X__icmp6_filt[typ>>5] &^= 1 << (uint32(typ) & 31)
+}
+
+func (f *icmpv6Filter) setAll(block bool) {
+	for i := range f.X__icmp6_filt {
+		if block {
+			f.X__icmp6_filt[i] = 0
+		} else {
+			f.X__icmp6_filt[i] = 1<<32 - 1
+		}
+	}
+}
+
+func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
+	return f.X__icmp6_filt[typ>>5]&(1<<(uint32(typ)&31)) == 0
+}

+ 23 - 0
vendor/golang.org/x/net/ipv6/icmp_stub.go

@@ -0,0 +1,23 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows
+
+package ipv6
+
+type icmpv6Filter struct {
+}
+
+func (f *icmpv6Filter) accept(typ ICMPType) {
+}
+
+func (f *icmpv6Filter) block(typ ICMPType) {
+}
+
+func (f *icmpv6Filter) setAll(block bool) {
+}
+
+func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
+	return false
+}

+ 22 - 0
vendor/golang.org/x/net/ipv6/icmp_windows.go

@@ -0,0 +1,22 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+func (f *icmpv6Filter) accept(typ ICMPType) {
+	// TODO(mikio): implement this
+}
+
+func (f *icmpv6Filter) block(typ ICMPType) {
+	// TODO(mikio): implement this
+}
+
+func (f *icmpv6Filter) setAll(block bool) {
+	// TODO(mikio): implement this
+}
+
+func (f *icmpv6Filter) willBlock(typ ICMPType) bool {
+	// TODO(mikio): implement this
+	return false
+}

+ 23 - 0
vendor/golang.org/x/net/ipv6/payload.go

@@ -0,0 +1,23 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"net"
+
+	"golang.org/x/net/internal/socket"
+)
+
+// BUG(mikio): On Windows, the ControlMessage for ReadFrom and WriteTo
+// methods of PacketConn is not implemented.
+
+// A payloadHandler represents the IPv6 datagram payload handler.
+type payloadHandler struct {
+	net.PacketConn
+	*socket.Conn
+	rawOpt
+}
+
+func (c *payloadHandler) ok() bool { return c != nil && c.PacketConn != nil && c.Conn != nil }

+ 35 - 0
vendor/golang.org/x/net/ipv6/payload_cmsg.go

@@ -0,0 +1,35 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !nacl,!plan9,!windows
+
+package ipv6
+
+import (
+	"net"
+	"syscall"
+)
+
+// ReadFrom reads a payload of the received IPv6 datagram, from the
+// endpoint c, copying the payload into b. It returns the number of
+// bytes copied into b, the control message cm and the source address
+// src of the received datagram.
+func (c *payloadHandler) ReadFrom(b []byte) (n int, cm *ControlMessage, src net.Addr, err error) {
+	if !c.ok() {
+		return 0, nil, nil, syscall.EINVAL
+	}
+	return c.readFrom(b)
+}
+
+// WriteTo writes a payload of the IPv6 datagram, to the destination
+// address dst through the endpoint c, copying the payload from b. It
+// returns the number of bytes written. The control message cm allows
+// the IPv6 header fields and the datagram path to be specified. The
+// cm may be nil if control of the outgoing datagram is not required.
+func (c *payloadHandler) WriteTo(b []byte, cm *ControlMessage, dst net.Addr) (n int, err error) {
+	if !c.ok() {
+		return 0, syscall.EINVAL
+	}
+	return c.writeTo(b, cm, dst)
+}

+ 55 - 0
vendor/golang.org/x/net/ipv6/payload_cmsg_go1_8.go

@@ -0,0 +1,55 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !go1.9
+// +build !nacl,!plan9,!windows
+
+package ipv6
+
+import "net"
+
+func (c *payloadHandler) readFrom(b []byte) (n int, cm *ControlMessage, src net.Addr, err error) {
+	c.rawOpt.RLock()
+	oob := NewControlMessage(c.rawOpt.cflags)
+	c.rawOpt.RUnlock()
+	var nn int
+	switch c := c.PacketConn.(type) {
+	case *net.UDPConn:
+		if n, nn, _, src, err = c.ReadMsgUDP(b, oob); err != nil {
+			return 0, nil, nil, err
+		}
+	case *net.IPConn:
+		if n, nn, _, src, err = c.ReadMsgIP(b, oob); err != nil {
+			return 0, nil, nil, err
+		}
+	default:
+		return 0, nil, nil, &net.OpError{Op: "read", Net: c.LocalAddr().Network(), Source: c.LocalAddr(), Err: errInvalidConnType}
+	}
+	if nn > 0 {
+		cm = new(ControlMessage)
+		if err = cm.Parse(oob[:nn]); err != nil {
+			return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
+		}
+	}
+	if cm != nil {
+		cm.Src = netAddrToIP16(src)
+	}
+	return
+}
+
+func (c *payloadHandler) writeTo(b []byte, cm *ControlMessage, dst net.Addr) (n int, err error) {
+	oob := cm.Marshal()
+	if dst == nil {
+		return 0, &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: errMissingAddress}
+	}
+	switch c := c.PacketConn.(type) {
+	case *net.UDPConn:
+		n, _, err = c.WriteMsgUDP(b, oob, dst.(*net.UDPAddr))
+	case *net.IPConn:
+		n, _, err = c.WriteMsgIP(b, oob, dst.(*net.IPAddr))
+	default:
+		return 0, &net.OpError{Op: "write", Net: c.LocalAddr().Network(), Source: c.LocalAddr(), Addr: opAddr(dst), Err: errInvalidConnType}
+	}
+	return
+}

+ 57 - 0
vendor/golang.org/x/net/ipv6/payload_cmsg_go1_9.go

@@ -0,0 +1,57 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.9
+// +build !nacl,!plan9,!windows
+
+package ipv6
+
+import (
+	"net"
+
+	"golang.org/x/net/internal/socket"
+)
+
+func (c *payloadHandler) readFrom(b []byte) (int, *ControlMessage, net.Addr, error) {
+	c.rawOpt.RLock()
+	m := socket.Message{
+		Buffers: [][]byte{b},
+		OOB:     NewControlMessage(c.rawOpt.cflags),
+	}
+	c.rawOpt.RUnlock()
+	switch c.PacketConn.(type) {
+	case *net.UDPConn:
+		if err := c.RecvMsg(&m, 0); err != nil {
+			return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
+		}
+	case *net.IPConn:
+		if err := c.RecvMsg(&m, 0); err != nil {
+			return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
+		}
+	default:
+		return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: errInvalidConnType}
+	}
+	var cm *ControlMessage
+	if m.NN > 0 {
+		cm = new(ControlMessage)
+		if err := cm.Parse(m.OOB[:m.NN]); err != nil {
+			return 0, nil, nil, &net.OpError{Op: "read", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Err: err}
+		}
+		cm.Src = netAddrToIP16(m.Addr)
+	}
+	return m.N, cm, m.Addr, nil
+}
+
+func (c *payloadHandler) writeTo(b []byte, cm *ControlMessage, dst net.Addr) (int, error) {
+	m := socket.Message{
+		Buffers: [][]byte{b},
+		OOB:     cm.Marshal(),
+		Addr:    dst,
+	}
+	err := c.SendMsg(&m, 0)
+	if err != nil {
+		err = &net.OpError{Op: "write", Net: c.PacketConn.LocalAddr().Network(), Source: c.PacketConn.LocalAddr(), Addr: opAddr(dst), Err: err}
+	}
+	return m.N, err
+}

+ 41 - 0
vendor/golang.org/x/net/ipv6/payload_nocmsg.go

@@ -0,0 +1,41 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build nacl plan9 windows
+
+package ipv6
+
+import (
+	"net"
+	"syscall"
+)
+
+// ReadFrom reads a payload of the received IPv6 datagram, from the
+// endpoint c, copying the payload into b. It returns the number of
+// bytes copied into b, the control message cm and the source address
+// src of the received datagram.
+func (c *payloadHandler) ReadFrom(b []byte) (n int, cm *ControlMessage, src net.Addr, err error) {
+	if !c.ok() {
+		return 0, nil, nil, syscall.EINVAL
+	}
+	if n, src, err = c.PacketConn.ReadFrom(b); err != nil {
+		return 0, nil, nil, err
+	}
+	return
+}
+
+// WriteTo writes a payload of the IPv6 datagram, to the destination
+// address dst through the endpoint c, copying the payload from b. It
+// returns the number of bytes written. The control message cm allows
+// the IPv6 header fields and the datagram path to be specified. The
+// cm may be nil if control of the outgoing datagram is not required.
+func (c *payloadHandler) WriteTo(b []byte, cm *ControlMessage, dst net.Addr) (n int, err error) {
+	if !c.ok() {
+		return 0, syscall.EINVAL
+	}
+	if dst == nil {
+		return 0, errMissingAddress
+	}
+	return c.PacketConn.WriteTo(b, dst)
+}

+ 43 - 0
vendor/golang.org/x/net/ipv6/sockopt.go

@@ -0,0 +1,43 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import "golang.org/x/net/internal/socket"
+
+// Sticky socket options
+const (
+	ssoTrafficClass        = iota // header field for unicast packet, RFC 3542
+	ssoHopLimit                   // header field for unicast packet, RFC 3493
+	ssoMulticastInterface         // outbound interface for multicast packet, RFC 3493
+	ssoMulticastHopLimit          // header field for multicast packet, RFC 3493
+	ssoMulticastLoopback          // loopback for multicast packet, RFC 3493
+	ssoReceiveTrafficClass        // header field on received packet, RFC 3542
+	ssoReceiveHopLimit            // header field on received packet, RFC 2292 or 3542
+	ssoReceivePacketInfo          // incbound or outbound packet path, RFC 2292 or 3542
+	ssoReceivePathMTU             // path mtu, RFC 3542
+	ssoPathMTU                    // path mtu, RFC 3542
+	ssoChecksum                   // packet checksum, RFC 2292 or 3542
+	ssoICMPFilter                 // icmp filter, RFC 2292 or 3542
+	ssoJoinGroup                  // any-source multicast, RFC 3493
+	ssoLeaveGroup                 // any-source multicast, RFC 3493
+	ssoJoinSourceGroup            // source-specific multicast
+	ssoLeaveSourceGroup           // source-specific multicast
+	ssoBlockSourceGroup           // any-source or source-specific multicast
+	ssoUnblockSourceGroup         // any-source or source-specific multicast
+	ssoAttachFilter               // attach BPF for filtering inbound traffic
+)
+
+// Sticky socket option value types
+const (
+	ssoTypeIPMreq = iota + 1
+	ssoTypeGroupReq
+	ssoTypeGroupSourceReq
+)
+
+// A sockOpt represents a binding for sticky socket option.
+type sockOpt struct {
+	socket.Option
+	typ int // hint for option value type; optional
+}

+ 87 - 0
vendor/golang.org/x/net/ipv6/sockopt_posix.go

@@ -0,0 +1,87 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris windows
+
+package ipv6
+
+import (
+	"net"
+	"unsafe"
+
+	"golang.org/x/net/bpf"
+	"golang.org/x/net/internal/socket"
+)
+
+func (so *sockOpt) getMulticastInterface(c *socket.Conn) (*net.Interface, error) {
+	n, err := so.GetInt(c)
+	if err != nil {
+		return nil, err
+	}
+	return net.InterfaceByIndex(n)
+}
+
+func (so *sockOpt) setMulticastInterface(c *socket.Conn, ifi *net.Interface) error {
+	var n int
+	if ifi != nil {
+		n = ifi.Index
+	}
+	return so.SetInt(c, n)
+}
+
+func (so *sockOpt) getICMPFilter(c *socket.Conn) (*ICMPFilter, error) {
+	b := make([]byte, so.Len)
+	n, err := so.Get(c, b)
+	if err != nil {
+		return nil, err
+	}
+	if n != sizeofICMPv6Filter {
+		return nil, errOpNoSupport
+	}
+	return (*ICMPFilter)(unsafe.Pointer(&b[0])), nil
+}
+
+func (so *sockOpt) setICMPFilter(c *socket.Conn, f *ICMPFilter) error {
+	b := (*[sizeofICMPv6Filter]byte)(unsafe.Pointer(f))[:sizeofICMPv6Filter]
+	return so.Set(c, b)
+}
+
+func (so *sockOpt) getMTUInfo(c *socket.Conn) (*net.Interface, int, error) {
+	b := make([]byte, so.Len)
+	n, err := so.Get(c, b)
+	if err != nil {
+		return nil, 0, err
+	}
+	if n != sizeofIPv6Mtuinfo {
+		return nil, 0, errOpNoSupport
+	}
+	mi := (*ipv6Mtuinfo)(unsafe.Pointer(&b[0]))
+	if mi.Addr.Scope_id == 0 {
+		return nil, int(mi.Mtu), nil
+	}
+	ifi, err := net.InterfaceByIndex(int(mi.Addr.Scope_id))
+	if err != nil {
+		return nil, 0, err
+	}
+	return ifi, int(mi.Mtu), nil
+}
+
+func (so *sockOpt) setGroup(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
+	switch so.typ {
+	case ssoTypeIPMreq:
+		return so.setIPMreq(c, ifi, grp)
+	case ssoTypeGroupReq:
+		return so.setGroupReq(c, ifi, grp)
+	default:
+		return errOpNoSupport
+	}
+}
+
+func (so *sockOpt) setSourceGroup(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
+	return so.setGroupSourceReq(c, ifi, grp, src)
+}
+
+func (so *sockOpt) setBPF(c *socket.Conn, f []bpf.RawInstruction) error {
+	return so.setAttachFilter(c, f)
+}

+ 46 - 0
vendor/golang.org/x/net/ipv6/sockopt_stub.go

@@ -0,0 +1,46 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows
+
+package ipv6
+
+import (
+	"net"
+
+	"golang.org/x/net/bpf"
+	"golang.org/x/net/internal/socket"
+)
+
+func (so *sockOpt) getMulticastInterface(c *socket.Conn) (*net.Interface, error) {
+	return nil, errOpNoSupport
+}
+
+func (so *sockOpt) setMulticastInterface(c *socket.Conn, ifi *net.Interface) error {
+	return errOpNoSupport
+}
+
+func (so *sockOpt) getICMPFilter(c *socket.Conn) (*ICMPFilter, error) {
+	return nil, errOpNoSupport
+}
+
+func (so *sockOpt) setICMPFilter(c *socket.Conn, f *ICMPFilter) error {
+	return errOpNoSupport
+}
+
+func (so *sockOpt) getMTUInfo(c *socket.Conn) (*net.Interface, int, error) {
+	return nil, 0, errOpNoSupport
+}
+
+func (so *sockOpt) setGroup(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
+	return errOpNoSupport
+}
+
+func (so *sockOpt) setSourceGroup(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
+	return errOpNoSupport
+}
+
+func (so *sockOpt) setBPF(c *socket.Conn, f []bpf.RawInstruction) error {
+	return errOpNoSupport
+}

+ 24 - 0
vendor/golang.org/x/net/ipv6/sys_asmreq.go

@@ -0,0 +1,24 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris windows
+
+package ipv6
+
+import (
+	"net"
+	"unsafe"
+
+	"golang.org/x/net/internal/socket"
+)
+
+func (so *sockOpt) setIPMreq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
+	var mreq ipv6Mreq
+	copy(mreq.Multiaddr[:], grp)
+	if ifi != nil {
+		mreq.setIfindex(ifi.Index)
+	}
+	b := (*[sizeofIPv6Mreq]byte)(unsafe.Pointer(&mreq))[:sizeofIPv6Mreq]
+	return so.Set(c, b)
+}

+ 17 - 0
vendor/golang.org/x/net/ipv6/sys_asmreq_stub.go

@@ -0,0 +1,17 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows
+
+package ipv6
+
+import (
+	"net"
+
+	"golang.org/x/net/internal/socket"
+)
+
+func (so *sockOpt) setIPMreq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
+	return errOpNoSupport
+}

+ 23 - 0
vendor/golang.org/x/net/ipv6/sys_bpf.go

@@ -0,0 +1,23 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux
+
+package ipv6
+
+import (
+	"unsafe"
+
+	"golang.org/x/net/bpf"
+	"golang.org/x/net/internal/socket"
+)
+
+func (so *sockOpt) setAttachFilter(c *socket.Conn, f []bpf.RawInstruction) error {
+	prog := sockFProg{
+		Len:    uint16(len(f)),
+		Filter: (*sockFilter)(unsafe.Pointer(&f[0])),
+	}
+	b := (*[sizeofSockFprog]byte)(unsafe.Pointer(&prog))[:sizeofSockFprog]
+	return so.Set(c, b)
+}

+ 16 - 0
vendor/golang.org/x/net/ipv6/sys_bpf_stub.go

@@ -0,0 +1,16 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !linux
+
+package ipv6
+
+import (
+	"golang.org/x/net/bpf"
+	"golang.org/x/net/internal/socket"
+)
+
+func (so *sockOpt) setAttachFilter(c *socket.Conn, f []bpf.RawInstruction) error {
+	return errOpNoSupport
+}

+ 57 - 0
vendor/golang.org/x/net/ipv6/sys_bsd.go

@@ -0,0 +1,57 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build dragonfly netbsd openbsd
+
+package ipv6
+
+import (
+	"net"
+	"syscall"
+
+	"golang.org/x/net/internal/iana"
+	"golang.org/x/net/internal/socket"
+)
+
+var (
+	ctlOpts = [ctlMax]ctlOpt{
+		ctlTrafficClass: {sysIPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
+		ctlHopLimit:     {sysIPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
+		ctlPacketInfo:   {sysIPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
+		ctlNextHop:      {sysIPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop},
+		ctlPathMTU:      {sysIPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
+	}
+
+	sockOpts = map[int]*sockOpt{
+		ssoTrafficClass:        {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_TCLASS, Len: 4}},
+		ssoHopLimit:            {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_UNICAST_HOPS, Len: 4}},
+		ssoMulticastInterface:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_IF, Len: 4}},
+		ssoMulticastHopLimit:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_HOPS, Len: 4}},
+		ssoMulticastLoopback:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_LOOP, Len: 4}},
+		ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVTCLASS, Len: 4}},
+		ssoReceiveHopLimit:     {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVHOPLIMIT, Len: 4}},
+		ssoReceivePacketInfo:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPKTINFO, Len: 4}},
+		ssoReceivePathMTU:      {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPATHMTU, Len: 4}},
+		ssoPathMTU:             {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
+		ssoChecksum:            {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_CHECKSUM, Len: 4}},
+		ssoICMPFilter:          {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: sysICMP6_FILTER, Len: sizeofICMPv6Filter}},
+		ssoJoinGroup:           {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_JOIN_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
+		ssoLeaveGroup:          {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_LEAVE_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
+	}
+)
+
+func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], ip)
+	sa.Scope_id = uint32(i)
+}
+
+func (pi *inet6Pktinfo) setIfindex(i int) {
+	pi.Ifindex = uint32(i)
+}
+
+func (mreq *ipv6Mreq) setIfindex(i int) {
+	mreq.Interface = uint32(i)
+}

+ 106 - 0
vendor/golang.org/x/net/ipv6/sys_darwin.go

@@ -0,0 +1,106 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"net"
+	"strconv"
+	"strings"
+	"syscall"
+	"unsafe"
+
+	"golang.org/x/net/internal/iana"
+	"golang.org/x/net/internal/socket"
+)
+
+var (
+	ctlOpts = [ctlMax]ctlOpt{
+		ctlHopLimit:   {sysIPV6_2292HOPLIMIT, 4, marshal2292HopLimit, parseHopLimit},
+		ctlPacketInfo: {sysIPV6_2292PKTINFO, sizeofInet6Pktinfo, marshal2292PacketInfo, parsePacketInfo},
+	}
+
+	sockOpts = map[int]*sockOpt{
+		ssoHopLimit:           {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_UNICAST_HOPS, Len: 4}},
+		ssoMulticastInterface: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_IF, Len: 4}},
+		ssoMulticastHopLimit:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_HOPS, Len: 4}},
+		ssoMulticastLoopback:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_LOOP, Len: 4}},
+		ssoReceiveHopLimit:    {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_2292HOPLIMIT, Len: 4}},
+		ssoReceivePacketInfo:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_2292PKTINFO, Len: 4}},
+		ssoChecksum:           {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_CHECKSUM, Len: 4}},
+		ssoICMPFilter:         {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: sysICMP6_FILTER, Len: sizeofICMPv6Filter}},
+		ssoJoinGroup:          {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_JOIN_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
+		ssoLeaveGroup:         {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_LEAVE_GROUP, Len: sizeofIPv6Mreq}, typ: ssoTypeIPMreq},
+	}
+)
+
+func init() {
+	// Seems like kern.osreldate is veiled on latest OS X. We use
+	// kern.osrelease instead.
+	s, err := syscall.Sysctl("kern.osrelease")
+	if err != nil {
+		return
+	}
+	ss := strings.Split(s, ".")
+	if len(ss) == 0 {
+		return
+	}
+	// The IP_PKTINFO and protocol-independent multicast API were
+	// introduced in OS X 10.7 (Darwin 11). But it looks like
+	// those features require OS X 10.8 (Darwin 12) or above.
+	// See http://support.apple.com/kb/HT1633.
+	if mjver, err := strconv.Atoi(ss[0]); err != nil || mjver < 12 {
+		return
+	}
+	ctlOpts[ctlTrafficClass] = ctlOpt{sysIPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass}
+	ctlOpts[ctlHopLimit] = ctlOpt{sysIPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit}
+	ctlOpts[ctlPacketInfo] = ctlOpt{sysIPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo}
+	ctlOpts[ctlNextHop] = ctlOpt{sysIPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop}
+	ctlOpts[ctlPathMTU] = ctlOpt{sysIPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU}
+	sockOpts[ssoTrafficClass] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_TCLASS, Len: 4}}
+	sockOpts[ssoReceiveTrafficClass] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVTCLASS, Len: 4}}
+	sockOpts[ssoReceiveHopLimit] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVHOPLIMIT, Len: 4}}
+	sockOpts[ssoReceivePacketInfo] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPKTINFO, Len: 4}}
+	sockOpts[ssoReceivePathMTU] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPATHMTU, Len: 4}}
+	sockOpts[ssoPathMTU] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}}
+	sockOpts[ssoJoinGroup] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq}
+	sockOpts[ssoLeaveGroup] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq}
+	sockOpts[ssoJoinSourceGroup] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq}
+	sockOpts[ssoLeaveSourceGroup] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq}
+	sockOpts[ssoBlockSourceGroup] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq}
+	sockOpts[ssoUnblockSourceGroup] = &sockOpt{Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq}
+}
+
+func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], ip)
+	sa.Scope_id = uint32(i)
+}
+
+func (pi *inet6Pktinfo) setIfindex(i int) {
+	pi.Ifindex = uint32(i)
+}
+
+func (mreq *ipv6Mreq) setIfindex(i int) {
+	mreq.Interface = uint32(i)
+}
+
+func (gr *groupReq) setGroup(grp net.IP) {
+	sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gr)) + 4))
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], grp)
+}
+
+func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
+	sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 4))
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], grp)
+	sa = (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 132))
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], src)
+}

+ 92 - 0
vendor/golang.org/x/net/ipv6/sys_freebsd.go

@@ -0,0 +1,92 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"net"
+	"runtime"
+	"strings"
+	"syscall"
+	"unsafe"
+
+	"golang.org/x/net/internal/iana"
+	"golang.org/x/net/internal/socket"
+)
+
+var (
+	ctlOpts = [ctlMax]ctlOpt{
+		ctlTrafficClass: {sysIPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
+		ctlHopLimit:     {sysIPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
+		ctlPacketInfo:   {sysIPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
+		ctlNextHop:      {sysIPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop},
+		ctlPathMTU:      {sysIPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
+	}
+
+	sockOpts = map[int]sockOpt{
+		ssoTrafficClass:        {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_TCLASS, Len: 4}},
+		ssoHopLimit:            {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_UNICAST_HOPS, Len: 4}},
+		ssoMulticastInterface:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_IF, Len: 4}},
+		ssoMulticastHopLimit:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_HOPS, Len: 4}},
+		ssoMulticastLoopback:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_LOOP, Len: 4}},
+		ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVTCLASS, Len: 4}},
+		ssoReceiveHopLimit:     {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVHOPLIMIT, Len: 4}},
+		ssoReceivePacketInfo:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPKTINFO, Len: 4}},
+		ssoReceivePathMTU:      {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPATHMTU, Len: 4}},
+		ssoPathMTU:             {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
+		ssoChecksum:            {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_CHECKSUM, Len: 4}},
+		ssoICMPFilter:          {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: sysICMP6_FILTER, Len: sizeofICMPv6Filter}},
+		ssoJoinGroup:           {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
+		ssoLeaveGroup:          {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
+		ssoJoinSourceGroup:     {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoLeaveSourceGroup:    {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoBlockSourceGroup:    {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoUnblockSourceGroup:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+	}
+)
+
+func init() {
+	if runtime.GOOS == "freebsd" && runtime.GOARCH == "386" {
+		archs, _ := syscall.Sysctl("kern.supported_archs")
+		for _, s := range strings.Fields(archs) {
+			if s == "amd64" {
+				freebsd32o64 = true
+				break
+			}
+		}
+	}
+}
+
+func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], ip)
+	sa.Scope_id = uint32(i)
+}
+
+func (pi *inet6Pktinfo) setIfindex(i int) {
+	pi.Ifindex = uint32(i)
+}
+
+func (mreq *ipv6Mreq) setIfindex(i int) {
+	mreq.Interface = uint32(i)
+}
+
+func (gr *groupReq) setGroup(grp net.IP) {
+	sa := (*sockaddrInet6)(unsafe.Pointer(&gr.Group))
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], grp)
+}
+
+func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
+	sa := (*sockaddrInet6)(unsafe.Pointer(&gsr.Group))
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], grp)
+	sa = (*sockaddrInet6)(unsafe.Pointer(&gsr.Source))
+	sa.Len = sizeofSockaddrInet6
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], src)
+}

+ 74 - 0
vendor/golang.org/x/net/ipv6/sys_linux.go

@@ -0,0 +1,74 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"net"
+	"syscall"
+	"unsafe"
+
+	"golang.org/x/net/internal/iana"
+	"golang.org/x/net/internal/socket"
+)
+
+var (
+	ctlOpts = [ctlMax]ctlOpt{
+		ctlTrafficClass: {sysIPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
+		ctlHopLimit:     {sysIPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
+		ctlPacketInfo:   {sysIPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
+		ctlPathMTU:      {sysIPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
+	}
+
+	sockOpts = map[int]*sockOpt{
+		ssoTrafficClass:        {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_TCLASS, Len: 4}},
+		ssoHopLimit:            {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_UNICAST_HOPS, Len: 4}},
+		ssoMulticastInterface:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_IF, Len: 4}},
+		ssoMulticastHopLimit:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_HOPS, Len: 4}},
+		ssoMulticastLoopback:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_LOOP, Len: 4}},
+		ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVTCLASS, Len: 4}},
+		ssoReceiveHopLimit:     {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVHOPLIMIT, Len: 4}},
+		ssoReceivePacketInfo:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPKTINFO, Len: 4}},
+		ssoReceivePathMTU:      {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPATHMTU, Len: 4}},
+		ssoPathMTU:             {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
+		ssoChecksum:            {Option: socket.Option{Level: iana.ProtocolReserved, Name: sysIPV6_CHECKSUM, Len: 4}},
+		ssoICMPFilter:          {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: sysICMPV6_FILTER, Len: sizeofICMPv6Filter}},
+		ssoJoinGroup:           {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
+		ssoLeaveGroup:          {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
+		ssoJoinSourceGroup:     {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoLeaveSourceGroup:    {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoBlockSourceGroup:    {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoUnblockSourceGroup:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoAttachFilter:        {Option: socket.Option{Level: sysSOL_SOCKET, Name: sysSO_ATTACH_FILTER, Len: sizeofSockFprog}},
+	}
+)
+
+func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], ip)
+	sa.Scope_id = uint32(i)
+}
+
+func (pi *inet6Pktinfo) setIfindex(i int) {
+	pi.Ifindex = int32(i)
+}
+
+func (mreq *ipv6Mreq) setIfindex(i int) {
+	mreq.Ifindex = int32(i)
+}
+
+func (gr *groupReq) setGroup(grp net.IP) {
+	sa := (*sockaddrInet6)(unsafe.Pointer(&gr.Group))
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], grp)
+}
+
+func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
+	sa := (*sockaddrInet6)(unsafe.Pointer(&gsr.Group))
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], grp)
+	sa = (*sockaddrInet6)(unsafe.Pointer(&gsr.Source))
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], src)
+}

+ 74 - 0
vendor/golang.org/x/net/ipv6/sys_solaris.go

@@ -0,0 +1,74 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ipv6
+
+import (
+	"net"
+	"syscall"
+	"unsafe"
+
+	"golang.org/x/net/internal/iana"
+	"golang.org/x/net/internal/socket"
+)
+
+var (
+	ctlOpts = [ctlMax]ctlOpt{
+		ctlTrafficClass: {sysIPV6_TCLASS, 4, marshalTrafficClass, parseTrafficClass},
+		ctlHopLimit:     {sysIPV6_HOPLIMIT, 4, marshalHopLimit, parseHopLimit},
+		ctlPacketInfo:   {sysIPV6_PKTINFO, sizeofInet6Pktinfo, marshalPacketInfo, parsePacketInfo},
+		ctlNextHop:      {sysIPV6_NEXTHOP, sizeofSockaddrInet6, marshalNextHop, parseNextHop},
+		ctlPathMTU:      {sysIPV6_PATHMTU, sizeofIPv6Mtuinfo, marshalPathMTU, parsePathMTU},
+	}
+
+	sockOpts = map[int]*sockOpt{
+		ssoTrafficClass:        {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_TCLASS, Len: 4}},
+		ssoHopLimit:            {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_UNICAST_HOPS, Len: 4}},
+		ssoMulticastInterface:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_IF, Len: 4}},
+		ssoMulticastHopLimit:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_HOPS, Len: 4}},
+		ssoMulticastLoopback:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_MULTICAST_LOOP, Len: 4}},
+		ssoReceiveTrafficClass: {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVTCLASS, Len: 4}},
+		ssoReceiveHopLimit:     {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVHOPLIMIT, Len: 4}},
+		ssoReceivePacketInfo:   {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPKTINFO, Len: 4}},
+		ssoReceivePathMTU:      {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_RECVPATHMTU, Len: 4}},
+		ssoPathMTU:             {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_PATHMTU, Len: sizeofIPv6Mtuinfo}},
+		ssoChecksum:            {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysIPV6_CHECKSUM, Len: 4}},
+		ssoICMPFilter:          {Option: socket.Option{Level: iana.ProtocolIPv6ICMP, Name: sysICMP6_FILTER, Len: sizeofICMPv6Filter}},
+		ssoJoinGroup:           {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_JOIN_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
+		ssoLeaveGroup:          {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_LEAVE_GROUP, Len: sizeofGroupReq}, typ: ssoTypeGroupReq},
+		ssoJoinSourceGroup:     {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_JOIN_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoLeaveSourceGroup:    {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_LEAVE_SOURCE_GROUP, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoBlockSourceGroup:    {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_BLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+		ssoUnblockSourceGroup:  {Option: socket.Option{Level: iana.ProtocolIPv6, Name: sysMCAST_UNBLOCK_SOURCE, Len: sizeofGroupSourceReq}, typ: ssoTypeGroupSourceReq},
+	}
+)
+
+func (sa *sockaddrInet6) setSockaddr(ip net.IP, i int) {
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], ip)
+	sa.Scope_id = uint32(i)
+}
+
+func (pi *inet6Pktinfo) setIfindex(i int) {
+	pi.Ifindex = uint32(i)
+}
+
+func (mreq *ipv6Mreq) setIfindex(i int) {
+	mreq.Interface = uint32(i)
+}
+
+func (gr *groupReq) setGroup(grp net.IP) {
+	sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gr)) + 4))
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], grp)
+}
+
+func (gsr *groupSourceReq) setSourceGroup(grp, src net.IP) {
+	sa := (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 4))
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], grp)
+	sa = (*sockaddrInet6)(unsafe.Pointer(uintptr(unsafe.Pointer(gsr)) + 260))
+	sa.Family = syscall.AF_INET6
+	copy(sa.Addr[:], src)
+}

+ 54 - 0
vendor/golang.org/x/net/ipv6/sys_ssmreq.go

@@ -0,0 +1,54 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin freebsd linux solaris
+
+package ipv6
+
+import (
+	"net"
+	"unsafe"
+
+	"golang.org/x/net/internal/socket"
+)
+
+var freebsd32o64 bool
+
+func (so *sockOpt) setGroupReq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
+	var gr groupReq
+	if ifi != nil {
+		gr.Interface = uint32(ifi.Index)
+	}
+	gr.setGroup(grp)
+	var b []byte
+	if freebsd32o64 {
+		var d [sizeofGroupReq + 4]byte
+		s := (*[sizeofGroupReq]byte)(unsafe.Pointer(&gr))
+		copy(d[:4], s[:4])
+		copy(d[8:], s[4:])
+		b = d[:]
+	} else {
+		b = (*[sizeofGroupReq]byte)(unsafe.Pointer(&gr))[:sizeofGroupReq]
+	}
+	return so.Set(c, b)
+}
+
+func (so *sockOpt) setGroupSourceReq(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
+	var gsr groupSourceReq
+	if ifi != nil {
+		gsr.Interface = uint32(ifi.Index)
+	}
+	gsr.setSourceGroup(grp, src)
+	var b []byte
+	if freebsd32o64 {
+		var d [sizeofGroupSourceReq + 4]byte
+		s := (*[sizeofGroupSourceReq]byte)(unsafe.Pointer(&gsr))
+		copy(d[:4], s[:4])
+		copy(d[8:], s[4:])
+		b = d[:]
+	} else {
+		b = (*[sizeofGroupSourceReq]byte)(unsafe.Pointer(&gsr))[:sizeofGroupSourceReq]
+	}
+	return so.Set(c, b)
+}

+ 21 - 0
vendor/golang.org/x/net/ipv6/sys_ssmreq_stub.go

@@ -0,0 +1,21 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin,!freebsd,!linux,!solaris
+
+package ipv6
+
+import (
+	"net"
+
+	"golang.org/x/net/internal/socket"
+)
+
+func (so *sockOpt) setGroupReq(c *socket.Conn, ifi *net.Interface, grp net.IP) error {
+	return errOpNoSupport
+}
+
+func (so *sockOpt) setGroupSourceReq(c *socket.Conn, ifi *net.Interface, grp, src net.IP) error {
+	return errOpNoSupport
+}

+ 13 - 0
vendor/golang.org/x/net/ipv6/sys_stub.go

@@ -0,0 +1,13 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows
+
+package ipv6
+
+var (
+	ctlOpts = [ctlMax]ctlOpt{}
+
+	sockOpts = map[int]*sockOpt{}
+)

Some files were not shown because too many files changed in this diff