parent
b50dee5a61
commit
9591185c8f
21
go.mod
21
go.mod
|
@ -16,14 +16,14 @@ require (
|
|||
gitea.com/macaron/session v0.0.0-20190821211443-122c47c5f705
|
||||
gitea.com/macaron/toolbox v0.0.0-20190822013122-05ff0fc766b7
|
||||
github.com/PuerkitoBio/goquery v1.5.0
|
||||
github.com/RoaringBitmap/roaring v0.4.7 // indirect
|
||||
github.com/RoaringBitmap/roaring v0.4.21 // indirect
|
||||
github.com/bgentry/speakeasy v0.1.0 // indirect
|
||||
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3
|
||||
github.com/blevesearch/bleve v0.8.1
|
||||
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 // indirect
|
||||
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f // indirect
|
||||
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc // indirect
|
||||
github.com/blevesearch/go-porterstemmer v1.0.2 // indirect
|
||||
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f // indirect
|
||||
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 // indirect
|
||||
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe // indirect
|
||||
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd // indirect
|
||||
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect
|
||||
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect
|
||||
github.com/cznic/strutil v0.0.0-20181122101858-275e90344537 // indirect
|
||||
|
@ -31,14 +31,13 @@ require (
|
|||
github.com/dgrijalva/jwt-go v3.2.0+incompatible
|
||||
github.com/editorconfig/editorconfig-core-go/v2 v2.1.1
|
||||
github.com/emirpasic/gods v1.12.0
|
||||
github.com/etcd-io/bbolt v1.3.2 // indirect
|
||||
github.com/etcd-io/bbolt v1.3.3 // indirect
|
||||
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a
|
||||
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect
|
||||
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect
|
||||
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect
|
||||
github.com/gliderlabs/ssh v0.2.2
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd // indirect
|
||||
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e // indirect
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect
|
||||
github.com/go-openapi/jsonreference v0.19.3 // indirect
|
||||
github.com/go-openapi/runtime v0.19.5 // indirect
|
||||
github.com/go-redis/redis v6.15.2+incompatible
|
||||
|
@ -68,12 +67,10 @@ require (
|
|||
github.com/mattn/go-sqlite3 v1.11.0
|
||||
github.com/mcuadros/go-version v0.0.0-20190308113854-92cdf37c5b75
|
||||
github.com/microcosm-cc/bluemonday v0.0.0-20161012083705-f77f16ffc87a
|
||||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae // indirect
|
||||
github.com/msteinert/pam v0.0.0-20151204160544-02ccfbfaf0cc
|
||||
github.com/nfnt/resize v0.0.0-20160724205520-891127d8d1b5
|
||||
github.com/niklasfasching/go-org v0.1.8
|
||||
github.com/oliamb/cutter v0.2.2
|
||||
github.com/philhofer/fwd v1.0.0 // indirect
|
||||
github.com/pkg/errors v0.8.1
|
||||
github.com/pquerna/otp v0.0.0-20160912161815-54653902c20e
|
||||
github.com/prometheus/client_golang v1.1.0
|
||||
|
@ -90,19 +87,17 @@ require (
|
|||
github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 // indirect
|
||||
github.com/stretchr/testify v1.4.0
|
||||
github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 // indirect
|
||||
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 // indirect
|
||||
github.com/tstranex/u2f v1.0.0
|
||||
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1
|
||||
github.com/unknwon/com v0.0.0-20190804042917-757f69c95f3e
|
||||
github.com/unknwon/i18n v0.0.0-20190805065654-5c6446a380b6
|
||||
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141
|
||||
github.com/urfave/cli v1.20.0
|
||||
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 // indirect
|
||||
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53
|
||||
golang.org/x/crypto v0.0.0-20191117063200-497ca9f6d64f
|
||||
golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9
|
||||
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47
|
||||
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2
|
||||
golang.org/x/text v0.3.2
|
||||
golang.org/x/tools v0.0.0-20190910221609-7f5965fd7709 // indirect
|
||||
gopkg.in/alexcesaro/quotedprintable.v3 v3.0.0-20150716171945-2caba252f4dc // indirect
|
||||
|
|
55
go.sum
55
go.sum
|
@ -46,8 +46,8 @@ github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tN
|
|||
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
|
||||
github.com/RoaringBitmap/roaring v0.4.7 h1:eGUudvFzvF7Kxh7JjYvXfI1f7l22/2duFby7r5+d4oc=
|
||||
github.com/RoaringBitmap/roaring v0.4.7/go.mod h1:8khRDP4HmeXns4xIj9oGrKSz7XTQiJx2zgh7AcNke4w=
|
||||
github.com/RoaringBitmap/roaring v0.4.21 h1:WJ/zIlNX4wQZ9x8Ey33O1UaD9TCTakYsdLFSBcTwH+8=
|
||||
github.com/RoaringBitmap/roaring v0.4.21/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
|
||||
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
|
||||
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
|
||||
github.com/Unknwon/com v0.0.0-20190321035513-0fed4efef755/go.mod h1:voKvFVpXBJxdIPeqjoJuLK+UVcRlo/JLjeToGxPYu68=
|
||||
|
@ -72,14 +72,14 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
|||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
|
||||
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
|
||||
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3 h1:vinCy/rcjbtxWnMiw11CbMKcuyNi+y4L4MbZUpk7m4M=
|
||||
github.com/blevesearch/bleve v0.0.0-20190214220507-05d86ea8f6e3/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw=
|
||||
github.com/blevesearch/bleve v0.8.1 h1:20zBREtGe8dvBxCC+717SaxKcUVQOWk3/Fm75vabKpU=
|
||||
github.com/blevesearch/bleve v0.8.1/go.mod h1:Y2lmIkzV6mcNfAnAdOd+ZxHkHchhBfU/xroGIp61wfw=
|
||||
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3 h1:U6vnxZrTfItfiUiYx0lf/LgHjRSfaKK5QHSom3lEbnA=
|
||||
github.com/blevesearch/blevex v0.0.0-20180227211930-4b158bb555a3/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ=
|
||||
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f h1:J9ZVHbB2X6JNxbKw/f3Y4E9Xq+Ro+zPiivzgmi3RTvg=
|
||||
github.com/blevesearch/go-porterstemmer v0.0.0-20141230013033-23a2c8e5cf1f/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA=
|
||||
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc h1:7OfDAkuAGx71ruzOIFqCkHqGIsVZU0C7PMw5u1bIrwU=
|
||||
github.com/blevesearch/segment v0.0.0-20160105220820-db70c57796cc/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.2 h1:qe7n69gBd1OLY5sHKnxQHIbzn0LNJA4hpAf+5XDxV2I=
|
||||
github.com/blevesearch/go-porterstemmer v1.0.2/go.mod h1:haWQqFT3RdOGz7PJuM3or/pWNJS1pKkoZJWCkWu0DVA=
|
||||
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f h1:kqbi9lqXLLs+zfWlgo1PIiRQ86n33K1JKotjj4rSYOg=
|
||||
github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f/go.mod h1:IInt5XRvpiGE09KOk9mmCMLjHhydIhNPKPPFLFBB7L8=
|
||||
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26 h1:NGpwhs9FOwddM6TptNrq2ycby4s24TcppSe5uG4DA/Q=
|
||||
github.com/boombuler/barcode v0.0.0-20161226211916-fe0f26ff6d26/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
|
||||
github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668 h1:U/lr3Dgy4WK+hNk4tyD+nuGjpVLPEHuJSFXMw11/HPA=
|
||||
|
@ -92,6 +92,7 @@ github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkE
|
|||
github.com/coreos/bbolt v1.3.3/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk=
|
||||
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
|
||||
github.com/coreos/etcd v3.3.15+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
|
||||
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
|
||||
github.com/coreos/go-oidc v2.1.0+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
|
||||
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
|
||||
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
|
||||
|
@ -102,10 +103,11 @@ github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d h1:XMf4E1U+b
|
|||
github.com/couchbase/gomemcached v0.0.0-20190515232915-c4b4ca0eb21d/go.mod h1:srVSlQLB8iXBVXHgnqemxUXqN6FCvClgCMPCsjBDR7c=
|
||||
github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b h1:bZ9rKU2/V8sY+NulSfxDOnXTWcs1rySqdF1sVepihvo=
|
||||
github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b/go.mod h1:BQwMFlJzDjFDG3DJUdU0KORxn88UlsOULuxLExMh3Hs=
|
||||
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe h1:2o6Y7KMjJNsuMTF8f2H2eTKRhqH7+bQbjr+D+LnhE5M=
|
||||
github.com/couchbase/vellum v0.0.0-20190111184608-e91b68ff3efe/go.mod h1:prYTC8EgTu3gwbqJihkud9zRXISvyulAplQ6exdCo1g=
|
||||
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd h1:zeuJhcG3f8eePshH3KxkNE+Xtl53pVln9MOUPMyr/1w=
|
||||
github.com/couchbase/vellum v0.0.0-20190829182332-ef2e028c01fd/go.mod h1:xbc8Ff/oG7h2ejd7AlwOpfd+6QZntc92ygpAOfGwcKY=
|
||||
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7 h1:1XjEY/gnjQ+AfXef2U6dxCquhiRzkEpxZuWqs+QxTL8=
|
||||
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7/go.mod h1:mby/05p8HE5yHEAKiIH/555NoblMs7PtW6NrYshDruc=
|
||||
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
|
||||
github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY=
|
||||
github.com/cupcake/rdb v0.0.0-20161107195141-43ba34106c76/go.mod h1:vYwsqCOLxGiisLwp9rITslkFNpZD5rz43tf41QFkTWY=
|
||||
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d h1:SwD98825d6bdB+pEuTxWOXiSjBrHdOl/UVp75eI7JT8=
|
||||
|
@ -135,8 +137,8 @@ github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
|
|||
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
|
||||
github.com/emirpasic/gods v1.12.0 h1:QAUIPSaCu4G+POclxeqb3F+WPpdKqFGlw36+yOzGlrg=
|
||||
github.com/emirpasic/gods v1.12.0/go.mod h1:YfzfFFoVP/catgzJb4IKIqXjX78Ha8FMSDh3ymbK86o=
|
||||
github.com/etcd-io/bbolt v1.3.2 h1:RLRQ0TKLX7DlBRXAJHvbmXL17Q3KNnTBtZ9B6Qo+/Y0=
|
||||
github.com/etcd-io/bbolt v1.3.2/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
|
||||
github.com/etcd-io/bbolt v1.3.3 h1:gSJmxrs37LgTqR/oyJBWok6k6SvXEUerFTbltIhXkBM=
|
||||
github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw=
|
||||
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a h1:M1bRpaZAn4GSsqu3hdK2R8H0AH9O6vqCTCbm2oAFGfE=
|
||||
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a/go.mod h1:MkKY/CB98aVE4VxO63X5vTQKUgcn+3XP15LMASe3lYs=
|
||||
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 h1:0JZ+dUmQeA8IIVUMzysrX4/AKuQwWhV2dYQuPZdvdSQ=
|
||||
|
@ -154,10 +156,11 @@ github.com/gliderlabs/ssh v0.2.2 h1:6zsha5zo/TWhRhwqCD3+EarCAgZ2yN28ipRnGPnwkI0=
|
|||
github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
|
||||
github.com/globalsign/mgo v0.0.0-20180905125535-1ca0a4f7cbcb/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
|
||||
github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd h1:r04MMPyLHj/QwZuMJ5+7tJcBr1AQjpiAK/rZWRrQT7o=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20180323001048-9f0cb55181dd/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
|
||||
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e h1:SiEs4J3BKVIeaWrH3tKaz3QLZhJ68iJ/A4xrzIoE5+Y=
|
||||
github.com/glycerine/goconvey v0.0.0-20190315024820-982ee783a72e/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqoVvjbiUioBBFUL5up+h+GdCa/AnJsL/1bIs/veSI=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
|
||||
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
|
||||
|
@ -279,6 +282,8 @@ github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORR
|
|||
github.com/gopherjs/gopherjs v0.0.0-20181103185306-d547d1d9531e/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||
github.com/gorilla/context v1.1.1 h1:AWwleXJkX/nhcU9bZSnZoi3h/qGYqQAGhq6zZe/aQW8=
|
||||
github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg=
|
||||
github.com/gorilla/handlers v1.4.2 h1:0QniY0USkHQ1RGCLfKxeNHK9bkDHGRYGNDFBCS+YARg=
|
||||
|
@ -304,6 +309,7 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
|
|||
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
|
||||
github.com/hpcloud/tail v1.0.0 h1:nfCOvKYfkgYP8hkirhJocXT2+zOD8yUNjXaWfTlyFKI=
|
||||
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
||||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
|
||||
github.com/issue9/assert v1.3.2 h1:IaTa37u4m1fUuTH9K9ldO5IONKVDXjLiUO1T9vj0OF0=
|
||||
github.com/issue9/assert v1.3.2/go.mod h1:9Ger+iz8X7r1zMYYwEhh++2wMGWcNN2oVI+zIQXxcio=
|
||||
github.com/issue9/identicon v0.0.0-20160320065130-d36b54562f4c h1:A/PDn117UYld5mlxe58EpMguqpkeTMw5/FCo0ZPS/Ko=
|
||||
|
@ -467,6 +473,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qq
|
|||
github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
|
||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||
github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo=
|
||||
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
|
||||
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
|
||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
||||
|
@ -504,11 +512,13 @@ github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc=
|
|||
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
|
||||
github.com/spf13/cast v1.3.0 h1:oget//CVOEoFewqQxwr0Ej5yjygnqGkvggSE/gB35Q8=
|
||||
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
|
||||
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
|
||||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
|
||||
github.com/spf13/jwalterweatherman v1.1.0 h1:ue6voC5bR5F8YxI5S67j9i582FU4Qvo2bmqnqMYADFk=
|
||||
github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
|
||||
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
|
||||
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
|
||||
github.com/spf13/viper v1.4.0 h1:yXHLWeravcrgGyFSyCgdYpXQ9dR9c/WED3pg1RhxqEU=
|
||||
github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE=
|
||||
github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4=
|
||||
|
@ -529,8 +539,8 @@ github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481 h1:HOxvxvnntLiPn1
|
|||
github.com/tecbot/gorocksdb v0.0.0-20181010114359-8752a9433481/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8=
|
||||
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
|
||||
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
|
||||
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200 h1:ZVvr38DYEyOPyelySqvF0I9I++85NnUMsWkroBDS4fs=
|
||||
github.com/tinylib/msgp v0.0.0-20180516164116-c8cf64dff200/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
|
||||
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
|
||||
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
|
||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
|
||||
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ=
|
||||
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM=
|
||||
|
@ -538,6 +548,7 @@ github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ=
|
|||
github.com/tstranex/u2f v1.0.0/go.mod h1:eahSLaqAS0zsIEv80+vXT7WanXs7MQQDg3j3wGBSayo=
|
||||
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
|
||||
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
|
||||
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
|
||||
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
|
||||
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1 h1:SpoCl3+Pta5/ubQyF+Fmx65obtpfkyzeaOIneCE3MTw=
|
||||
github.com/unknwon/cae v0.0.0-20190822084630-55a0b64484a1/go.mod h1:QaSeRctcea9fK6piJpAMCCPKxzJ01+xFcr2k1m3WRPU=
|
||||
|
@ -549,8 +560,8 @@ github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 h1:Z79lyIznnziKA
|
|||
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141/go.mod h1:TBwoao3Q4Eb/cp+dHbXDfRTrZSsj/k7kLr2j1oWRWC0=
|
||||
github.com/urfave/cli v1.20.0 h1:fDqGv3UG/4jbVl/QkFwEdddtEDjh/5Ov6X+0B/3bPaw=
|
||||
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
|
||||
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621 h1:E8u341JM/N8LCnPXBV6ZFD1RKo/j+qHl1XOqSV+GstA=
|
||||
github.com/willf/bitset v0.0.0-20180426185212-8ce1146b8621/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
|
||||
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
|
||||
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
|
||||
github.com/xanzy/ssh-agent v0.2.1 h1:TCbipTQL2JiiCprBWx9frJ2eJlCYT00NmctrHxVAr70=
|
||||
github.com/xanzy/ssh-agent v0.2.1/go.mod h1:mLlQY/MoOhWBj+gOGMQkOeiEvkx+8pJSI+0Bx9h2kr4=
|
||||
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
|
||||
|
@ -574,6 +585,7 @@ go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/
|
|||
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
|
||||
golang.org/x/crypto v0.0.0-20180820150726-614d502a4dac/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.0.0-20190219172222-a4c6cb3142f2/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20190320223903-b7391e95e576/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
|
@ -642,6 +654,7 @@ golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5h
|
|||
golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190221075227-b4e8571b14e0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
|
@ -660,6 +673,8 @@ golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7w
|
|||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY=
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2 h1:/J2nHFg1MTqaRLFO7M+J78ASNsJoz3r0cvHBPQ77fsE=
|
||||
golang.org/x/sys v0.0.0-20191127021746-63cb32ae39b2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
kind: pipeline
|
||||
name: default
|
||||
|
||||
workspace:
|
||||
base: /go
|
||||
path: src/github.com/RoaringBitmap/roaring
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: golang
|
||||
commands:
|
||||
- go get -t
|
||||
- go test
|
||||
- go test -race -run TestConcurrent*
|
||||
- go build -tags appengine
|
||||
- go test -tags appengine
|
||||
- GOARCH=386 go build
|
||||
- GOARCH=386 go test
|
||||
- GOARCH=arm go build
|
||||
- GOARCH=arm64 go build
|
|
@ -8,10 +8,12 @@ install:
|
|||
notifications:
|
||||
email: false
|
||||
go:
|
||||
- 1.7.x
|
||||
- 1.8.x
|
||||
- 1.9.x
|
||||
- 1.10.x
|
||||
- "1.7.x"
|
||||
- "1.8.x"
|
||||
- "1.9.x"
|
||||
- "1.10.x"
|
||||
- "1.11.x"
|
||||
- "1.12.x"
|
||||
- tip
|
||||
|
||||
# whitelist
|
||||
|
@ -21,10 +23,14 @@ branches:
|
|||
script:
|
||||
- goveralls -v -service travis-ci -ignore arraycontainer_gen.go,bitmapcontainer_gen.go,rle16_gen.go,rle_gen.go,roaringarray_gen.go,rle.go || go test
|
||||
- go test -race -run TestConcurrent*
|
||||
- go build -tags appengine
|
||||
- go test -tags appengine
|
||||
- GOARCH=arm64 go build
|
||||
- GOARCH=386 go build
|
||||
- GOARCH=386 go test
|
||||
- GOARCH=arm go build
|
||||
- GOARCH=arm64 go build
|
||||
|
||||
matrix:
|
||||
allow_failures:
|
||||
- go: tip
|
||||
|
|
|
@ -7,4 +7,5 @@ Bob Potter (@bpot),
|
|||
Tyson Maly (@tvmaly),
|
||||
Will Glynn (@willglynn),
|
||||
Brent Pedersen (@brentp)
|
||||
Maciej Biłas (@maciej)
|
||||
Maciej Biłas (@maciej),
|
||||
Joe Nall (@joenall)
|
||||
|
|
|
@ -9,4 +9,8 @@ Will Glynn (@willglynn),
|
|||
Brent Pedersen (@brentp),
|
||||
Jason E. Aten (@glycerine),
|
||||
Vali Malinoiu (@0x4139),
|
||||
Forud Ghafouri (@fzerorubigd)
|
||||
Forud Ghafouri (@fzerorubigd),
|
||||
Joe Nall (@joenall),
|
||||
(@fredim),
|
||||
Edd Robinson (@e-dard),
|
||||
Alexander Petrov (@alldroll)
|
||||
|
|
|
@ -200,3 +200,36 @@
|
|||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
================================================================================
|
||||
|
||||
Portions of runcontainer.go are from the Go standard library, which is licensed
|
||||
under:
|
||||
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke rle backrle ser fetch-real-roaring-datasets
|
||||
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
|
||||
|
||||
|
||||
|
||||
|
@ -63,7 +63,7 @@ qa: fmtcheck test vet lint
|
|||
|
||||
# Get the dependencies
|
||||
deps:
|
||||
GOPATH=$(GOPATH) go get github.com/smartystreets/goconvey/convey
|
||||
GOPATH=$(GOPATH) go get github.com/stretchr/testify
|
||||
GOPATH=$(GOPATH) go get github.com/willf/bitset
|
||||
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
|
||||
GOPATH=$(GOPATH) go get github.com/mschoch/smat
|
||||
|
@ -97,18 +97,8 @@ nuke:
|
|||
rm -rf ./target
|
||||
GOPATH=$(GOPATH) go clean -i ./...
|
||||
|
||||
rle:
|
||||
cp rle.go rle16.go
|
||||
perl -pi -e 's/32/16/g' rle16.go
|
||||
cp rle_test.go rle16_test.go
|
||||
perl -pi -e 's/32/16/g' rle16_test.go
|
||||
|
||||
backrle:
|
||||
cp rle16.go rle.go
|
||||
perl -pi -e 's/16/32/g' rle.go
|
||||
perl -pi -e 's/2032/2016/g' rle.go
|
||||
|
||||
ser: rle
|
||||
ser:
|
||||
go generate
|
||||
|
||||
cover:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
roaring [![Build Status](https://travis-ci.org/RoaringBitmap/roaring.png)](https://travis-ci.org/RoaringBitmap/roaring) [![Coverage Status](https://coveralls.io/repos/github/RoaringBitmap/roaring/badge.svg?branch=master)](https://coveralls.io/github/RoaringBitmap/roaring?branch=master) [![GoDoc](https://godoc.org/github.com/RoaringBitmap/roaring?status.svg)](https://godoc.org/github.com/RoaringBitmap/roaring) [![Go Report Card](https://goreportcard.com/badge/RoaringBitmap/roaring)](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
|
||||
[![Build Status](https://cloud.drone.io/api/badges/RoaringBitmap/roaring/status.svg)](https://cloud.drone.io/RoaringBitmap/roaring)
|
||||
=============
|
||||
|
||||
This is a go version of the Roaring bitmap data structure.
|
||||
|
@ -6,12 +7,12 @@ This is a go version of the Roaring bitmap data structure.
|
|||
|
||||
|
||||
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
|
||||
[Elasticsearch][elasticsearch], [Metamarkets' Druid][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
|
||||
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [Cloud Torrent][cloudtorrent], [Whoosh][whoosh], [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin].
|
||||
|
||||
[lucene]: https://lucene.apache.org/
|
||||
[solr]: https://lucene.apache.org/solr/
|
||||
[elasticsearch]: https://www.elastic.co/products/elasticsearch
|
||||
[druid]: http://druid.io/
|
||||
[druid]: https://druid.apache.org/
|
||||
[spark]: https://spark.apache.org/
|
||||
[opensearchserver]: http://www.opensearchserver.com
|
||||
[cloudtorrent]: https://github.com/jpillora/cloud-torrent
|
||||
|
@ -61,7 +62,6 @@ http://arxiv.org/abs/1402.6407 This paper used data from http://lemire.me/data/r
|
|||
Dependencies are fetched automatically by giving the `-t` flag to `go get`.
|
||||
|
||||
they include
|
||||
- github.com/smartystreets/goconvey/convey
|
||||
- github.com/willf/bitset
|
||||
- github.com/mschoch/smat
|
||||
- github.com/glycerine/go-unsnap-stream
|
||||
|
@ -133,6 +133,7 @@ func main() {
|
|||
if rb1.Equals(newrb) {
|
||||
fmt.Println("I wrote the content to a byte stream and read it back.")
|
||||
}
|
||||
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -206,7 +207,7 @@ You can use roaring with gore:
|
|||
|
||||
- go get -u github.com/motemen/gore
|
||||
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
|
||||
- go get github/RoaringBitmap/roaring
|
||||
- go get github.com/RoaringBitmap/roaring
|
||||
|
||||
```go
|
||||
$ gore
|
||||
|
|
|
@ -24,12 +24,16 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
|
|||
}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getShortIterator() shortIterable {
|
||||
func (ac *arrayContainer) getShortIterator() shortPeekable {
|
||||
return &shortIterator{ac.content, 0}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getReverseIterator() shortIterable {
|
||||
return &reverseIterator{ac.content, len(ac.content) - 1}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) getManyIterator() manyIterable {
|
||||
return &manyIterator{ac.content, 0}
|
||||
return &shortIterator{ac.content, 0}
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) minimum() uint16 {
|
||||
|
@ -115,7 +119,6 @@ func (ac *arrayContainer) iremoveRange(firstOfRange, endx int) container {
|
|||
// flip the values in the range [firstOfRange,endx)
|
||||
func (ac *arrayContainer) not(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
//p("arrayContainer.not(): exiting early with ac.clone()")
|
||||
return ac.clone()
|
||||
}
|
||||
return ac.notClose(firstOfRange, endx-1) // remove everything in [firstOfRange,endx-1]
|
||||
|
@ -124,18 +127,15 @@ func (ac *arrayContainer) not(firstOfRange, endx int) container {
|
|||
// flip the values in the range [firstOfRange,lastOfRange]
|
||||
func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
|
||||
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
|
||||
//p("arrayContainer.notClose(): exiting early with ac.clone()")
|
||||
return ac.clone()
|
||||
}
|
||||
|
||||
// determine the span of array indices to be affected^M
|
||||
startIndex := binarySearch(ac.content, uint16(firstOfRange))
|
||||
//p("startIndex=%v", startIndex)
|
||||
if startIndex < 0 {
|
||||
startIndex = -startIndex - 1
|
||||
}
|
||||
lastIndex := binarySearch(ac.content, uint16(lastOfRange))
|
||||
//p("lastIndex=%v", lastIndex)
|
||||
if lastIndex < 0 {
|
||||
lastIndex = -lastIndex - 2
|
||||
}
|
||||
|
@ -144,9 +144,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
|
|||
newValuesInRange := spanToBeFlipped - currentValuesInRange
|
||||
cardinalityChange := newValuesInRange - currentValuesInRange
|
||||
newCardinality := len(ac.content) + cardinalityChange
|
||||
//p("new card is %v", newCardinality)
|
||||
if newCardinality > arrayDefaultMaxSize {
|
||||
//p("new card over arrayDefaultMaxSize, so returning bitmap")
|
||||
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
|
||||
}
|
||||
answer := newArrayContainer()
|
||||
|
@ -503,7 +501,6 @@ func (ac *arrayContainer) lazyorArray(value2 *arrayContainer) container {
|
|||
}
|
||||
|
||||
func (ac *arrayContainer) and(a container) container {
|
||||
//p("ac.and() called")
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return ac.andArray(x)
|
||||
|
@ -550,7 +547,7 @@ func (ac *arrayContainer) iand(a container) container {
|
|||
return ac.iandBitmap(x)
|
||||
case *runContainer16:
|
||||
if x.isFull() {
|
||||
return ac.clone()
|
||||
return ac
|
||||
}
|
||||
return x.andArray(ac)
|
||||
}
|
||||
|
@ -722,7 +719,6 @@ func (ac *arrayContainer) inot(firstOfRange, endx int) container {
|
|||
|
||||
// flip the values in the range [firstOfRange,lastOfRange]
|
||||
func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
|
||||
//p("ac.inotClose() starting")
|
||||
if firstOfRange > lastOfRange { // unlike add and remove, not uses an inclusive range [firstOfRange,lastOfRange]
|
||||
return ac
|
||||
}
|
||||
|
@ -745,7 +741,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
|
|||
if cardinalityChange > 0 {
|
||||
if newCardinality > len(ac.content) {
|
||||
if newCardinality > arrayDefaultMaxSize {
|
||||
//p("ac.inotClose() converting to bitmap and doing inot there")
|
||||
bcRet := ac.toBitmapContainer()
|
||||
bcRet.inot(firstOfRange, lastOfRange+1)
|
||||
*ac = *bcRet.toArrayContainer()
|
||||
|
@ -766,7 +761,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
|
|||
}
|
||||
}
|
||||
ac.content = ac.content[:newCardinality]
|
||||
//p("bottom of ac.inotClose(): returning ac")
|
||||
return ac
|
||||
}
|
||||
|
||||
|
@ -958,3 +952,17 @@ func (ac *arrayContainer) toEfficientContainer() container {
|
|||
func (ac *arrayContainer) containerType() contype {
|
||||
return arrayContype
|
||||
}
|
||||
|
||||
func (ac *arrayContainer) addOffset(x uint16) []container {
|
||||
low := &arrayContainer{}
|
||||
high := &arrayContainer{}
|
||||
for _, val := range ac.content {
|
||||
y := uint32(val) + uint32(x)
|
||||
if highbits(y) > 0 {
|
||||
high.content = append(high.content, lowbits(y))
|
||||
} else {
|
||||
low.content = append(low.content, lowbits(y))
|
||||
}
|
||||
}
|
||||
return []container{low, high}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ package roaring
|
|||
|
||||
import "github.com/tinylib/msgp/msgp"
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -49,7 +49,7 @@ func (z *arrayContainer) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 1
|
||||
// write "content"
|
||||
|
@ -70,7 +70,7 @@ func (z *arrayContainer) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 1
|
||||
|
@ -83,7 +83,7 @@ func (z *arrayContainer) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -127,7 +127,7 @@ func (z *arrayContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *arrayContainer) Msgsize() (s int) {
|
||||
s = 1 + 8 + msgp.ArrayHeaderSize + (len(z.content) * (msgp.Uint16Size))
|
||||
return
|
||||
|
|
|
@ -110,14 +110,54 @@ func (bcsi *bitmapContainerShortIterator) hasNext() bool {
|
|||
return bcsi.i >= 0
|
||||
}
|
||||
|
||||
func (bcsi *bitmapContainerShortIterator) peekNext() uint16 {
|
||||
return uint16(bcsi.i)
|
||||
}
|
||||
|
||||
func (bcsi *bitmapContainerShortIterator) advanceIfNeeded(minval uint16) {
|
||||
if bcsi.hasNext() && bcsi.peekNext() < minval {
|
||||
bcsi.i = bcsi.ptr.NextSetBit(int(minval))
|
||||
}
|
||||
}
|
||||
|
||||
func newBitmapContainerShortIterator(a *bitmapContainer) *bitmapContainerShortIterator {
|
||||
return &bitmapContainerShortIterator{a, a.NextSetBit(0)}
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getShortIterator() shortIterable {
|
||||
func (bc *bitmapContainer) getShortIterator() shortPeekable {
|
||||
return newBitmapContainerShortIterator(bc)
|
||||
}
|
||||
|
||||
type reverseBitmapContainerShortIterator struct {
|
||||
ptr *bitmapContainer
|
||||
i int
|
||||
}
|
||||
|
||||
func (bcsi *reverseBitmapContainerShortIterator) next() uint16 {
|
||||
if bcsi.i == -1 {
|
||||
panic("reverseBitmapContainerShortIterator.next() going beyond what is available")
|
||||
}
|
||||
|
||||
j := bcsi.i
|
||||
bcsi.i = bcsi.ptr.PrevSetBit(bcsi.i - 1)
|
||||
return uint16(j)
|
||||
}
|
||||
|
||||
func (bcsi *reverseBitmapContainerShortIterator) hasNext() bool {
|
||||
return bcsi.i >= 0
|
||||
}
|
||||
|
||||
func newReverseBitmapContainerShortIterator(a *bitmapContainer) *reverseBitmapContainerShortIterator {
|
||||
if a.cardinality == 0 {
|
||||
return &reverseBitmapContainerShortIterator{a, -1}
|
||||
}
|
||||
return &reverseBitmapContainerShortIterator{a, int(a.maximum())}
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) getReverseIterator() shortIterable {
|
||||
return newReverseBitmapContainerShortIterator(bc)
|
||||
}
|
||||
|
||||
type bitmapContainerManyIterator struct {
|
||||
ptr *bitmapContainer
|
||||
base int
|
||||
|
@ -131,7 +171,7 @@ func (bcmi *bitmapContainerManyIterator) nextMany(hs uint32, buf []uint32) int {
|
|||
|
||||
for n < len(buf) {
|
||||
if bitset == 0 {
|
||||
base += 1
|
||||
base++
|
||||
if base >= len(bcmi.ptr.bitmap) {
|
||||
bcmi.base = base
|
||||
bcmi.bitset = bitset
|
||||
|
@ -177,16 +217,13 @@ func bitmapContainerSizeInBytes() int {
|
|||
|
||||
func bitmapEquals(a, b []uint64) bool {
|
||||
if len(a) != len(b) {
|
||||
//p("bitmaps differ on length. len(a)=%v; len(b)=%v", len(a), len(b))
|
||||
return false
|
||||
}
|
||||
for i, v := range a {
|
||||
if v != b[i] {
|
||||
//p("bitmaps differ on element i=%v", i)
|
||||
return false
|
||||
}
|
||||
}
|
||||
//p("bitmapEquals returning true")
|
||||
return true
|
||||
}
|
||||
|
||||
|
@ -209,9 +246,7 @@ func (bc *bitmapContainer) fillLeastSignificant16bits(x []uint32, i int, mask ui
|
|||
func (bc *bitmapContainer) equals(o container) bool {
|
||||
srb, ok := o.(*bitmapContainer)
|
||||
if ok {
|
||||
//p("bitmapContainers.equals: both are bitmapContainers")
|
||||
if srb.cardinality != bc.cardinality {
|
||||
//p("bitmapContainers.equals: card differs: %v vs %v", srb.cardinality, bc.cardinality)
|
||||
return false
|
||||
}
|
||||
return bitmapEquals(bc.bitmap, srb.bitmap)
|
||||
|
@ -261,12 +296,6 @@ func (bc *bitmapContainer) iremoveReturnMinimized(i uint16) container {
|
|||
|
||||
// iremove returns true if i was found.
|
||||
func (bc *bitmapContainer) iremove(i uint16) bool {
|
||||
/* branchless code
|
||||
w := bc.bitmap[i>>6]
|
||||
mask := uint64(1) << (i % 64)
|
||||
neww := w &^ mask
|
||||
bc.cardinality -= int((w ^ neww) >> (i % 64))
|
||||
bc.bitmap[i>>6] = neww */
|
||||
if bc.contains(i) {
|
||||
bc.cardinality--
|
||||
bc.bitmap[i/64] &^= (uint64(1) << (i % 64))
|
||||
|
@ -306,14 +335,10 @@ func (bc *bitmapContainer) iremoveRange(firstOfRange, lastOfRange int) container
|
|||
|
||||
// flip all values in range [firstOfRange,endx)
|
||||
func (bc *bitmapContainer) inot(firstOfRange, endx int) container {
|
||||
p("bc.inot() called with [%v, %v)", firstOfRange, endx)
|
||||
if endx-firstOfRange == maxCapacity {
|
||||
//p("endx-firstOfRange == maxCapacity")
|
||||
flipBitmapRange(bc.bitmap, firstOfRange, endx)
|
||||
bc.cardinality = maxCapacity - bc.cardinality
|
||||
//p("bc.cardinality is now %v", bc.cardinality)
|
||||
} else if endx-firstOfRange > maxCapacity/2 {
|
||||
//p("endx-firstOfRange > maxCapacity/2")
|
||||
flipBitmapRange(bc.bitmap, firstOfRange, endx)
|
||||
bc.computeCardinality()
|
||||
} else {
|
||||
|
@ -517,11 +542,31 @@ func (bc *bitmapContainer) iorBitmap(value2 *bitmapContainer) container {
|
|||
func (bc *bitmapContainer) lazyIORArray(value2 *arrayContainer) container {
|
||||
answer := bc
|
||||
c := value2.getCardinality()
|
||||
for k := 0; k < c; k++ {
|
||||
for k := 0; k+3 < c; k += 4 {
|
||||
content := (*[4]uint16)(unsafe.Pointer(&value2.content[k]))
|
||||
vc0 := content[0]
|
||||
i0 := uint(vc0) >> 6
|
||||
answer.bitmap[i0] = answer.bitmap[i0] | (uint64(1) << (vc0 % 64))
|
||||
|
||||
vc1 := content[1]
|
||||
i1 := uint(vc1) >> 6
|
||||
answer.bitmap[i1] = answer.bitmap[i1] | (uint64(1) << (vc1 % 64))
|
||||
|
||||
vc2 := content[2]
|
||||
i2 := uint(vc2) >> 6
|
||||
answer.bitmap[i2] = answer.bitmap[i2] | (uint64(1) << (vc2 % 64))
|
||||
|
||||
vc3 := content[3]
|
||||
i3 := uint(vc3) >> 6
|
||||
answer.bitmap[i3] = answer.bitmap[i3] | (uint64(1) << (vc3 % 64))
|
||||
}
|
||||
|
||||
for k := c &^ 3; k < c; k++ {
|
||||
vc := value2.content[k]
|
||||
i := uint(vc) >> 6
|
||||
answer.bitmap[i] = answer.bitmap[i] | (uint64(1) << (vc % 64))
|
||||
}
|
||||
|
||||
answer.cardinality = invalidCardinality
|
||||
return answer
|
||||
}
|
||||
|
@ -789,8 +834,6 @@ func (bc *bitmapContainer) andNotRun16(rc *runContainer16) container {
|
|||
}
|
||||
|
||||
func (bc *bitmapContainer) iandNot(a container) container {
|
||||
//p("bitmapContainer.iandNot() starting")
|
||||
|
||||
switch x := a.(type) {
|
||||
case *arrayContainer:
|
||||
return bc.iandNotArray(x)
|
||||
|
@ -844,12 +887,15 @@ func (bc *bitmapContainer) andNotBitmap(value2 *bitmapContainer) container {
|
|||
return ac
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) *bitmapContainer {
|
||||
func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) container {
|
||||
newCardinality := int(popcntMaskSlice(bc.bitmap, value2.bitmap))
|
||||
for k := 0; k < len(bc.bitmap); k++ {
|
||||
bc.bitmap[k] = bc.bitmap[k] &^ value2.bitmap[k]
|
||||
}
|
||||
bc.cardinality = newCardinality
|
||||
if bc.getCardinality() <= arrayDefaultMaxSize {
|
||||
return bc.toArrayContainer()
|
||||
}
|
||||
return bc
|
||||
}
|
||||
|
||||
|
@ -917,6 +963,32 @@ func (bc *bitmapContainer) NextSetBit(i int) int {
|
|||
return -1
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) PrevSetBit(i int) int {
|
||||
if i < 0 {
|
||||
return -1
|
||||
}
|
||||
x := i / 64
|
||||
if x >= len(bc.bitmap) {
|
||||
return -1
|
||||
}
|
||||
|
||||
w := bc.bitmap[x]
|
||||
|
||||
b := i % 64
|
||||
|
||||
w = w << uint(63-b)
|
||||
if w != 0 {
|
||||
return i - countLeadingZeros(w)
|
||||
}
|
||||
x--
|
||||
for ; x >= 0; x-- {
|
||||
if bc.bitmap[x] != 0 {
|
||||
return (x * 64) + 63 - countLeadingZeros(bc.bitmap[x])
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// reference the java implementation
|
||||
// https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/BitmapContainer.java#L875-L892
|
||||
//
|
||||
|
@ -980,3 +1052,35 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
|
|||
func (bc *bitmapContainer) containerType() contype {
|
||||
return bitmapContype
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) addOffset(x uint16) []container {
|
||||
low := newBitmapContainer()
|
||||
high := newBitmapContainer()
|
||||
b := uint32(x) >> 6
|
||||
i := uint32(x) % 64
|
||||
end := uint32(1024) - b
|
||||
if i == 0 {
|
||||
copy(low.bitmap[b:], bc.bitmap[:end])
|
||||
copy(high.bitmap[:b], bc.bitmap[end:])
|
||||
} else {
|
||||
low.bitmap[b] = bc.bitmap[0] << i
|
||||
for k := uint32(1); k < end; k++ {
|
||||
newval := bc.bitmap[k] << i
|
||||
if newval == 0 {
|
||||
newval = bc.bitmap[k-1] >> (64 - i)
|
||||
}
|
||||
low.bitmap[b+k] = newval
|
||||
}
|
||||
for k := end; k < 1024; k++ {
|
||||
newval := bc.bitmap[k] << i
|
||||
if newval == 0 {
|
||||
newval = bc.bitmap[k-1] >> (64 - i)
|
||||
}
|
||||
high.bitmap[k-end] = newval
|
||||
}
|
||||
high.bitmap[b] = bc.bitmap[1023] >> (64 - i)
|
||||
}
|
||||
low.computeCardinality()
|
||||
high.computeCardinality()
|
||||
return []container{low, high}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ package roaring
|
|||
|
||||
import "github.com/tinylib/msgp/msgp"
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -54,7 +54,7 @@ func (z *bitmapContainer) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 2
|
||||
// write "cardinality"
|
||||
|
@ -84,7 +84,7 @@ func (z *bitmapContainer) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 2
|
||||
|
@ -100,7 +100,7 @@ func (z *bitmapContainer) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -149,13 +149,13 @@ func (z *bitmapContainer) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *bitmapContainer) Msgsize() (s int) {
|
||||
s = 1 + 12 + msgp.IntSize + 7 + msgp.ArrayHeaderSize + (len(z.bitmap) * (msgp.Uint64Size))
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -239,7 +239,7 @@ func (z *bitmapContainerShortIterator) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 2
|
||||
// write "ptr"
|
||||
|
@ -291,7 +291,7 @@ func (z *bitmapContainerShortIterator) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 2
|
||||
|
@ -317,7 +317,7 @@ func (z *bitmapContainerShortIterator) MarshalMsg(b []byte) (o []byte, err error
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -402,7 +402,7 @@ func (z *bitmapContainerShortIterator) UnmarshalMsg(bts []byte) (o []byte, err e
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *bitmapContainerShortIterator) Msgsize() (s int) {
|
||||
s = 1 + 4
|
||||
if z.ptr == nil {
|
||||
|
|
|
@ -0,0 +1,161 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
)
|
||||
|
||||
type byteInput interface {
|
||||
// next returns a slice containing the next n bytes from the buffer,
|
||||
// advancing the buffer as if the bytes had been returned by Read.
|
||||
next(n int) ([]byte, error)
|
||||
// readUInt32 reads uint32 with LittleEndian order
|
||||
readUInt32() (uint32, error)
|
||||
// readUInt16 reads uint16 with LittleEndian order
|
||||
readUInt16() (uint16, error)
|
||||
// getReadBytes returns read bytes
|
||||
getReadBytes() int64
|
||||
// skipBytes skips exactly n bytes
|
||||
skipBytes(n int) error
|
||||
}
|
||||
|
||||
func newByteInputFromReader(reader io.Reader) byteInput {
|
||||
return &byteInputAdapter{
|
||||
r: reader,
|
||||
readBytes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
func newByteInput(buf []byte) byteInput {
|
||||
return &byteBuffer{
|
||||
buf: buf,
|
||||
off: 0,
|
||||
}
|
||||
}
|
||||
|
||||
type byteBuffer struct {
|
||||
buf []byte
|
||||
off int
|
||||
}
|
||||
|
||||
// next returns a slice containing the next n bytes from the reader
|
||||
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
|
||||
func (b *byteBuffer) next(n int) ([]byte, error) {
|
||||
m := len(b.buf) - b.off
|
||||
|
||||
if n > m {
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
data := b.buf[b.off : b.off+n]
|
||||
b.off += n
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// readUInt32 reads uint32 with LittleEndian order
|
||||
func (b *byteBuffer) readUInt32() (uint32, error) {
|
||||
if len(b.buf)-b.off < 4 {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
v := binary.LittleEndian.Uint32(b.buf[b.off:])
|
||||
b.off += 4
|
||||
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// readUInt16 reads uint16 with LittleEndian order
|
||||
func (b *byteBuffer) readUInt16() (uint16, error) {
|
||||
if len(b.buf)-b.off < 2 {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
v := binary.LittleEndian.Uint16(b.buf[b.off:])
|
||||
b.off += 2
|
||||
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// getReadBytes returns read bytes
|
||||
func (b *byteBuffer) getReadBytes() int64 {
|
||||
return int64(b.off)
|
||||
}
|
||||
|
||||
// skipBytes skips exactly n bytes
|
||||
func (b *byteBuffer) skipBytes(n int) error {
|
||||
m := len(b.buf) - b.off
|
||||
|
||||
if n > m {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
b.off += n
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// reset resets the given buffer with a new byte slice
|
||||
func (b *byteBuffer) reset(buf []byte) {
|
||||
b.buf = buf
|
||||
b.off = 0
|
||||
}
|
||||
|
||||
type byteInputAdapter struct {
|
||||
r io.Reader
|
||||
readBytes int
|
||||
}
|
||||
|
||||
// next returns a slice containing the next n bytes from the buffer,
|
||||
// advancing the buffer as if the bytes had been returned by Read.
|
||||
func (b *byteInputAdapter) next(n int) ([]byte, error) {
|
||||
buf := make([]byte, n)
|
||||
m, err := io.ReadAtLeast(b.r, buf, n)
|
||||
b.readBytes += m
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// readUInt32 reads uint32 with LittleEndian order
|
||||
func (b *byteInputAdapter) readUInt32() (uint32, error) {
|
||||
buf, err := b.next(4)
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return binary.LittleEndian.Uint32(buf), nil
|
||||
}
|
||||
|
||||
// readUInt16 reads uint16 with LittleEndian order
|
||||
func (b *byteInputAdapter) readUInt16() (uint16, error) {
|
||||
buf, err := b.next(2)
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return binary.LittleEndian.Uint16(buf), nil
|
||||
}
|
||||
|
||||
// getReadBytes returns read bytes
|
||||
func (b *byteInputAdapter) getReadBytes() int64 {
|
||||
return int64(b.readBytes)
|
||||
}
|
||||
|
||||
// skipBytes skips exactly n bytes
|
||||
func (b *byteInputAdapter) skipBytes(n int) error {
|
||||
_, err := b.next(n)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// reset resets the given buffer with a new stream
|
||||
func (b *byteInputAdapter) reset(stream io.Reader) {
|
||||
b.r = stream
|
||||
b.readBytes = 0
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
// +build go1.9
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func countLeadingZeros(x uint64) int {
|
||||
return bits.LeadingZeros64(x)
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
// +build !go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
// LeadingZeroBits returns the number of consecutive most significant zero
|
||||
// bits of x.
|
||||
func countLeadingZeros(i uint64) int {
|
||||
if i == 0 {
|
||||
return 64
|
||||
}
|
||||
n := 1
|
||||
x := uint32(i >> 32)
|
||||
if x == 0 {
|
||||
n += 32
|
||||
x = uint32(i)
|
||||
}
|
||||
if (x >> 16) == 0 {
|
||||
n += 16
|
||||
x <<= 16
|
||||
}
|
||||
if (x >> 24) == 0 {
|
||||
n += 8
|
||||
x <<= 8
|
||||
}
|
||||
if x>>28 == 0 {
|
||||
n += 4
|
||||
x <<= 4
|
||||
}
|
||||
if x>>30 == 0 {
|
||||
n += 2
|
||||
x <<= 2
|
||||
|
||||
}
|
||||
n -= int(x >> 31)
|
||||
return n
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
module github.com/RoaringBitmap/roaring
|
||||
|
||||
go 1.12
|
||||
|
||||
require (
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect
|
||||
github.com/golang/snappy v0.0.1 // indirect
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 // indirect
|
||||
github.com/jtolds/gls v4.20.0+incompatible // indirect
|
||||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae
|
||||
github.com/philhofer/fwd v1.0.0 // indirect
|
||||
github.com/stretchr/testify v1.4.0
|
||||
github.com/tinylib/msgp v1.1.0
|
||||
github.com/willf/bitset v1.1.10
|
||||
)
|
|
@ -0,0 +1,30 @@
|
|||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
|
||||
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
|
||||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
|
||||
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
|
||||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
|
||||
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY=
|
||||
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg=
|
||||
github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ=
|
||||
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
|
||||
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
|
||||
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
|
||||
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
|
@ -4,12 +4,7 @@ type manyIterable interface {
|
|||
nextMany(hs uint32, buf []uint32) int
|
||||
}
|
||||
|
||||
type manyIterator struct {
|
||||
slice []uint16
|
||||
loc int
|
||||
}
|
||||
|
||||
func (si *manyIterator) nextMany(hs uint32, buf []uint32) int {
|
||||
func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
|
||||
n := 0
|
||||
l := si.loc
|
||||
s := si.slice
|
||||
|
|
|
@ -143,8 +143,8 @@ func toBitmapContainer(c container) container {
|
|||
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
|
||||
expectedKeys := -1
|
||||
appendedKeys := 0
|
||||
keys := make([]uint16, 0)
|
||||
containers := make([]container, 0)
|
||||
var keys []uint16
|
||||
var containers []container
|
||||
for appendedKeys != expectedKeys {
|
||||
select {
|
||||
case item := <-resultChan:
|
||||
|
@ -337,7 +337,7 @@ func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
|||
// (if it is set to 0, a default number of workers is chosen)
|
||||
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
var lKey uint16 = MaxUint16
|
||||
var hKey uint16 = 0
|
||||
var hKey uint16
|
||||
|
||||
bitmapsFiltered := bitmaps[:0]
|
||||
for _, b := range bitmaps {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,163 +0,0 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// common to rle32.go and rle16.go
|
||||
|
||||
// rleVerbose controls whether p() prints show up.
|
||||
// The testing package sets this based on
|
||||
// testing.Verbose().
|
||||
var rleVerbose bool
|
||||
|
||||
// p is a shorthand for fmt.Printf with beginning and
|
||||
// trailing newlines. p() makes it easy
|
||||
// to add diagnostic print statements.
|
||||
func p(format string, args ...interface{}) {
|
||||
if rleVerbose {
|
||||
fmt.Printf("\n"+format+"\n", args...)
|
||||
}
|
||||
}
|
||||
|
||||
// MaxUint32 is the largest uint32 value.
|
||||
const MaxUint32 = 4294967295
|
||||
|
||||
// MaxUint16 is the largest 16 bit unsigned int.
|
||||
// This is the largest value an interval16 can store.
|
||||
const MaxUint16 = 65535
|
||||
|
||||
// searchOptions allows us to accelerate runContainer32.search with
|
||||
// prior knowledge of (mostly lower) bounds. This is used by Union
|
||||
// and Intersect.
|
||||
type searchOptions struct {
|
||||
// start here instead of at 0
|
||||
startIndex int64
|
||||
|
||||
// upper bound instead of len(rc.iv);
|
||||
// endxIndex == 0 means ignore the bound and use
|
||||
// endxIndex == n ==len(rc.iv) which is also
|
||||
// naturally the default for search()
|
||||
// when opt = nil.
|
||||
endxIndex int64
|
||||
}
|
||||
|
||||
// And finds the intersection of rc and b.
|
||||
func (rc *runContainer32) And(b *Bitmap) *Bitmap {
|
||||
out := NewBitmap()
|
||||
for _, p := range rc.iv {
|
||||
for i := p.start; i <= p.last; i++ {
|
||||
if b.Contains(i) {
|
||||
out.Add(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Xor returns the exclusive-or of rc and b.
|
||||
func (rc *runContainer32) Xor(b *Bitmap) *Bitmap {
|
||||
out := b.Clone()
|
||||
for _, p := range rc.iv {
|
||||
for v := p.start; v <= p.last; v++ {
|
||||
if out.Contains(v) {
|
||||
out.RemoveRange(uint64(v), uint64(v+1))
|
||||
} else {
|
||||
out.Add(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Or returns the union of rc and b.
|
||||
func (rc *runContainer32) Or(b *Bitmap) *Bitmap {
|
||||
out := b.Clone()
|
||||
for _, p := range rc.iv {
|
||||
for v := p.start; v <= p.last; v++ {
|
||||
out.Add(v)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// trial is used in the randomized testing of runContainers
|
||||
type trial struct {
|
||||
n int
|
||||
percentFill float64
|
||||
ntrial int
|
||||
|
||||
// only in the union test
|
||||
// only subtract test
|
||||
percentDelete float64
|
||||
|
||||
// only in 067 randomized operations
|
||||
// we do this + 1 passes
|
||||
numRandomOpsPass int
|
||||
|
||||
// allow sampling range control
|
||||
// only recent tests respect this.
|
||||
srang *interval16
|
||||
}
|
||||
|
||||
// And finds the intersection of rc and b.
|
||||
func (rc *runContainer16) And(b *Bitmap) *Bitmap {
|
||||
out := NewBitmap()
|
||||
for _, p := range rc.iv {
|
||||
plast := p.last()
|
||||
for i := p.start; i <= plast; i++ {
|
||||
if b.Contains(uint32(i)) {
|
||||
out.Add(uint32(i))
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Xor returns the exclusive-or of rc and b.
|
||||
func (rc *runContainer16) Xor(b *Bitmap) *Bitmap {
|
||||
out := b.Clone()
|
||||
for _, p := range rc.iv {
|
||||
plast := p.last()
|
||||
for v := p.start; v <= plast; v++ {
|
||||
w := uint32(v)
|
||||
if out.Contains(w) {
|
||||
out.RemoveRange(uint64(w), uint64(w+1))
|
||||
} else {
|
||||
out.Add(w)
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Or returns the union of rc and b.
|
||||
func (rc *runContainer16) Or(b *Bitmap) *Bitmap {
|
||||
out := b.Clone()
|
||||
for _, p := range rc.iv {
|
||||
plast := p.last()
|
||||
for v := p.start; v <= plast; v++ {
|
||||
out.Add(uint32(v))
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
//func (rc *runContainer32) and(container) container {
|
||||
// panic("TODO. not yet implemented")
|
||||
//}
|
||||
|
||||
// serializedSizeInBytes returns the number of bytes of memory
|
||||
// required by this runContainer16. This is for the
|
||||
// Roaring format, as specified https://github.com/RoaringBitmap/RoaringFormatSpec/
|
||||
func (rc *runContainer16) serializedSizeInBytes() int {
|
||||
// number of runs in one uint16, then each run
|
||||
// needs two more uint16
|
||||
return 2 + len(rc.iv)*4
|
||||
}
|
||||
|
||||
// serializedSizeInBytes returns the number of bytes of memory
|
||||
// required by this runContainer32.
|
||||
func (rc *runContainer32) serializedSizeInBytes() int {
|
||||
return 4 + len(rc.iv)*8
|
||||
}
|
|
@ -1,695 +0,0 @@
|
|||
package roaring
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
//
|
||||
// container interface methods for runContainer16
|
||||
//
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// compile time verify we meet interface requirements
|
||||
var _ container = &runContainer16{}
|
||||
|
||||
func (rc *runContainer16) clone() container {
|
||||
return newRunContainer16CopyIv(rc.iv)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) minimum() uint16 {
|
||||
return rc.iv[0].start // assume not empty
|
||||
}
|
||||
|
||||
func (rc *runContainer16) maximum() uint16 {
|
||||
return rc.iv[len(rc.iv)-1].last() // assume not empty
|
||||
}
|
||||
|
||||
func (rc *runContainer16) isFull() bool {
|
||||
return (len(rc.iv) == 1) && ((rc.iv[0].start == 0) && (rc.iv[0].last() == MaxUint16))
|
||||
}
|
||||
|
||||
func (rc *runContainer16) and(a container) container {
|
||||
if rc.isFull() {
|
||||
return a.clone()
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.intersect(c)
|
||||
case *arrayContainer:
|
||||
return rc.andArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.andBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andCardinality(a container) int {
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return int(rc.intersectCardinality(c))
|
||||
case *arrayContainer:
|
||||
return rc.andArrayCardinality(c)
|
||||
case *bitmapContainer:
|
||||
return rc.andBitmapContainerCardinality(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
// andBitmapContainer finds the intersection of rc and b.
|
||||
func (rc *runContainer16) andBitmapContainer(bc *bitmapContainer) container {
|
||||
bc2 := newBitmapContainerFromRun(rc)
|
||||
return bc2.andBitmap(bc)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andArrayCardinality(ac *arrayContainer) int {
|
||||
pos := 0
|
||||
answer := 0
|
||||
maxpos := ac.getCardinality()
|
||||
if maxpos == 0 {
|
||||
return 0 // won't happen in actual code
|
||||
}
|
||||
v := ac.content[pos]
|
||||
mainloop:
|
||||
for _, p := range rc.iv {
|
||||
for v < p.start {
|
||||
pos++
|
||||
if pos == maxpos {
|
||||
break mainloop
|
||||
}
|
||||
v = ac.content[pos]
|
||||
}
|
||||
for v <= p.last() {
|
||||
answer++
|
||||
pos++
|
||||
if pos == maxpos {
|
||||
break mainloop
|
||||
}
|
||||
v = ac.content[pos]
|
||||
}
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iand(a container) container {
|
||||
if rc.isFull() {
|
||||
return a.clone()
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.inplaceIntersect(c)
|
||||
case *arrayContainer:
|
||||
return rc.andArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.iandBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) inplaceIntersect(rc2 *runContainer16) container {
|
||||
// TODO: optimize by doing less allocation, possibly?
|
||||
|
||||
// sect will be new
|
||||
sect := rc.intersect(rc2)
|
||||
*rc = *sect
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandBitmapContainer(bc *bitmapContainer) container {
|
||||
isect := rc.andBitmapContainer(bc)
|
||||
*rc = *newRunContainer16FromContainer(isect)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andArray(ac *arrayContainer) container {
|
||||
if len(rc.iv) == 0 {
|
||||
return newArrayContainer()
|
||||
}
|
||||
|
||||
acCardinality := ac.getCardinality()
|
||||
c := newArrayContainerCapacity(acCardinality)
|
||||
|
||||
for rlePos, arrayPos := 0, 0; arrayPos < acCardinality; {
|
||||
iv := rc.iv[rlePos]
|
||||
arrayVal := ac.content[arrayPos]
|
||||
|
||||
for iv.last() < arrayVal {
|
||||
rlePos++
|
||||
if rlePos == len(rc.iv) {
|
||||
return c
|
||||
}
|
||||
iv = rc.iv[rlePos]
|
||||
}
|
||||
|
||||
if iv.start > arrayVal {
|
||||
arrayPos = advanceUntil(ac.content, arrayPos, len(ac.content), iv.start)
|
||||
} else {
|
||||
c.content = append(c.content, arrayVal)
|
||||
arrayPos++
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andNot(a container) container {
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.andNotArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.andNotBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.andNotRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) fillLeastSignificant16bits(x []uint32, i int, mask uint32) {
|
||||
k := 0
|
||||
var val int64
|
||||
for _, p := range rc.iv {
|
||||
n := p.runlen()
|
||||
for j := int64(0); j < n; j++ {
|
||||
val = int64(p.start) + j
|
||||
x[k+i] = uint32(val) | mask
|
||||
k++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (rc *runContainer16) getShortIterator() shortIterable {
|
||||
return rc.newRunIterator16()
|
||||
}
|
||||
|
||||
func (rc *runContainer16) getManyIterator() manyIterable {
|
||||
return rc.newManyRunIterator16()
|
||||
}
|
||||
|
||||
// add the values in the range [firstOfRange, endx). endx
|
||||
// is still abe to express 2^16 because it is an int not an uint16.
|
||||
func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
|
||||
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange", endx))
|
||||
}
|
||||
addme := newRunContainer16TakeOwnership([]interval16{
|
||||
{
|
||||
start: uint16(firstOfRange),
|
||||
length: uint16(endx - 1 - firstOfRange),
|
||||
},
|
||||
})
|
||||
*rc = *rc.union(addme)
|
||||
return rc
|
||||
}
|
||||
|
||||
// remove the values in the range [firstOfRange,endx)
|
||||
func (rc *runContainer16) iremoveRange(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("request to iremove empty set [%v, %v),"+
|
||||
" nothing to do.", firstOfRange, endx))
|
||||
//return rc
|
||||
}
|
||||
x := newInterval16Range(uint16(firstOfRange), uint16(endx-1))
|
||||
rc.isubtract(x)
|
||||
return rc
|
||||
}
|
||||
|
||||
// not flip the values in the range [firstOfRange,endx)
|
||||
func (rc *runContainer16) not(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
|
||||
}
|
||||
|
||||
return rc.Not(firstOfRange, endx)
|
||||
}
|
||||
|
||||
// Not flips the values in the range [firstOfRange,endx).
|
||||
// This is not inplace. Only the returned value has the flipped bits.
|
||||
//
|
||||
// Currently implemented as (!A intersect B) union (A minus B),
|
||||
// where A is rc, and B is the supplied [firstOfRange, endx) interval.
|
||||
//
|
||||
// TODO(time optimization): convert this to a single pass
|
||||
// algorithm by copying AndNotRunContainer16() and modifying it.
|
||||
// Current routine is correct but
|
||||
// makes 2 more passes through the arrays than should be
|
||||
// strictly necessary. Measure both ways though--this may not matter.
|
||||
//
|
||||
func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
|
||||
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange == %v", endx, firstOfRange))
|
||||
}
|
||||
|
||||
if firstOfRange >= endx {
|
||||
return rc.Clone()
|
||||
}
|
||||
|
||||
a := rc
|
||||
// algo:
|
||||
// (!A intersect B) union (A minus B)
|
||||
|
||||
nota := a.invert()
|
||||
|
||||
bs := []interval16{newInterval16Range(uint16(firstOfRange), uint16(endx-1))}
|
||||
b := newRunContainer16TakeOwnership(bs)
|
||||
|
||||
notAintersectB := nota.intersect(b)
|
||||
|
||||
aMinusB := a.AndNotRunContainer16(b)
|
||||
|
||||
rc2 := notAintersectB.union(aMinusB)
|
||||
return rc2
|
||||
}
|
||||
|
||||
// equals is now logical equals; it does not require the
|
||||
// same underlying container type.
|
||||
func (rc *runContainer16) equals(o container) bool {
|
||||
srb, ok := o.(*runContainer16)
|
||||
|
||||
if !ok {
|
||||
// maybe value instead of pointer
|
||||
val, valok := o.(*runContainer16)
|
||||
if valok {
|
||||
srb = val
|
||||
ok = true
|
||||
}
|
||||
}
|
||||
if ok {
|
||||
// Check if the containers are the same object.
|
||||
if rc == srb {
|
||||
return true
|
||||
}
|
||||
|
||||
if len(srb.iv) != len(rc.iv) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, v := range rc.iv {
|
||||
if v != srb.iv[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// use generic comparison
|
||||
if o.getCardinality() != rc.getCardinality() {
|
||||
return false
|
||||
}
|
||||
rit := rc.getShortIterator()
|
||||
bit := o.getShortIterator()
|
||||
|
||||
//k := 0
|
||||
for rit.hasNext() {
|
||||
if bit.next() != rit.next() {
|
||||
return false
|
||||
}
|
||||
//k++
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iaddReturnMinimized(x uint16) container {
|
||||
rc.Add(x)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iadd(x uint16) (wasNew bool) {
|
||||
return rc.Add(x)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iremoveReturnMinimized(x uint16) container {
|
||||
rc.removeKey(x)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iremove(x uint16) bool {
|
||||
return rc.removeKey(x)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) or(a container) container {
|
||||
if rc.isFull() {
|
||||
return rc.clone()
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.union(c)
|
||||
case *arrayContainer:
|
||||
return rc.orArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.orBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) orCardinality(a container) int {
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return int(rc.unionCardinality(c))
|
||||
case *arrayContainer:
|
||||
return rc.orArrayCardinality(c)
|
||||
case *bitmapContainer:
|
||||
return rc.orBitmapContainerCardinality(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
// orBitmapContainer finds the union of rc and bc.
|
||||
func (rc *runContainer16) orBitmapContainer(bc *bitmapContainer) container {
|
||||
bc2 := newBitmapContainerFromRun(rc)
|
||||
return bc2.iorBitmap(bc)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int {
|
||||
answer := 0
|
||||
for i := range rc.iv {
|
||||
answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
|
||||
}
|
||||
//bc.computeCardinality()
|
||||
return answer
|
||||
}
|
||||
|
||||
func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int {
|
||||
return rc.getCardinality() + bc.getCardinality() - rc.andBitmapContainerCardinality(bc)
|
||||
}
|
||||
|
||||
// orArray finds the union of rc and ac.
|
||||
func (rc *runContainer16) orArray(ac *arrayContainer) container {
|
||||
bc1 := newBitmapContainerFromRun(rc)
|
||||
bc2 := ac.toBitmapContainer()
|
||||
return bc1.orBitmap(bc2)
|
||||
}
|
||||
|
||||
// orArray finds the union of rc and ac.
|
||||
func (rc *runContainer16) orArrayCardinality(ac *arrayContainer) int {
|
||||
return ac.getCardinality() + rc.getCardinality() - rc.andArrayCardinality(ac)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) ior(a container) container {
|
||||
if rc.isFull() {
|
||||
return rc
|
||||
}
|
||||
switch c := a.(type) {
|
||||
case *runContainer16:
|
||||
return rc.inplaceUnion(c)
|
||||
case *arrayContainer:
|
||||
return rc.iorArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.iorBitmapContainer(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
|
||||
p("rc.inplaceUnion with len(rc2.iv)=%v", len(rc2.iv))
|
||||
for _, p := range rc2.iv {
|
||||
last := int64(p.last())
|
||||
for i := int64(p.start); i <= last; i++ {
|
||||
rc.Add(uint16(i))
|
||||
}
|
||||
}
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
|
||||
|
||||
it := bc.getShortIterator()
|
||||
for it.hasNext() {
|
||||
rc.Add(it.next())
|
||||
}
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iorArray(ac *arrayContainer) container {
|
||||
it := ac.getShortIterator()
|
||||
for it.hasNext() {
|
||||
rc.Add(it.next())
|
||||
}
|
||||
return rc
|
||||
}
|
||||
|
||||
// lazyIOR is described (not yet implemented) in
|
||||
// this nice note from @lemire on
|
||||
// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
|
||||
//
|
||||
// Description of lazyOR and lazyIOR from @lemire:
|
||||
//
|
||||
// Lazy functions are optional and can be simply
|
||||
// wrapper around non-lazy functions.
|
||||
//
|
||||
// The idea of "laziness" is as follows. It is
|
||||
// inspired by the concept of lazy evaluation
|
||||
// you might be familiar with (functional programming
|
||||
// and all that). So a roaring bitmap is
|
||||
// such that all its containers are, in some
|
||||
// sense, chosen to use as little memory as
|
||||
// possible. This is nice. Also, all bitsets
|
||||
// are "cardinality aware" so that you can do
|
||||
// fast rank/select queries, or query the
|
||||
// cardinality of the whole bitmap... very fast,
|
||||
// without latency.
|
||||
//
|
||||
// However, imagine that you are aggregating 100
|
||||
// bitmaps together. So you OR the first two, then OR
|
||||
// that with the third one and so forth. Clearly,
|
||||
// intermediate bitmaps don't need to be as
|
||||
// compressed as possible, right? They can be
|
||||
// in a "dirty state". You only need the end
|
||||
// result to be in a nice state... which you
|
||||
// can achieve by calling repairAfterLazy at the end.
|
||||
//
|
||||
// The Java/C code does something special for
|
||||
// the in-place lazy OR runs. The idea is that
|
||||
// instead of taking two run containers and
|
||||
// generating a new one, we actually try to
|
||||
// do the computation in-place through a
|
||||
// technique invented by @gssiyankai (pinging him!).
|
||||
// What you do is you check whether the host
|
||||
// run container has lots of extra capacity.
|
||||
// If it does, you move its data at the end of
|
||||
// the backing array, and then you write
|
||||
// the answer at the beginning. What this
|
||||
// trick does is minimize memory allocations.
|
||||
//
|
||||
func (rc *runContainer16) lazyIOR(a container) container {
|
||||
// not lazy at the moment
|
||||
// TODO: make it lazy
|
||||
return rc.ior(a)
|
||||
|
||||
/*
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.lazyIorArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.lazyIorBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.lazyIorRun16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
*/
|
||||
}
|
||||
|
||||
// lazyOR is described above in lazyIOR.
|
||||
func (rc *runContainer16) lazyOR(a container) container {
|
||||
|
||||
// not lazy at the moment
|
||||
// TODO: make it lazy
|
||||
return rc.or(a)
|
||||
|
||||
/*
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.lazyOrArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.lazyOrBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.lazyOrRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
*/
|
||||
}
|
||||
|
||||
func (rc *runContainer16) intersects(a container) bool {
|
||||
// TODO: optimize by doing inplace/less allocation, possibly?
|
||||
isect := rc.and(a)
|
||||
return isect.getCardinality() > 0
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xor(a container) container {
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.xorArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.xorBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.xorRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNot(a container) container {
|
||||
switch c := a.(type) {
|
||||
case *arrayContainer:
|
||||
return rc.iandNotArray(c)
|
||||
case *bitmapContainer:
|
||||
return rc.iandNotBitmap(c)
|
||||
case *runContainer16:
|
||||
return rc.iandNotRunContainer16(c)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
||||
|
||||
// flip the values in the range [firstOfRange,endx)
|
||||
func (rc *runContainer16) inot(firstOfRange, endx int) container {
|
||||
if firstOfRange >= endx {
|
||||
panic(fmt.Sprintf("invalid %v = endx >= firstOfRange = %v", endx, firstOfRange))
|
||||
}
|
||||
// TODO: minimize copies, do it all inplace; not() makes a copy.
|
||||
rc = rc.Not(firstOfRange, endx)
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) getCardinality() int {
|
||||
return int(rc.cardinality())
|
||||
}
|
||||
|
||||
func (rc *runContainer16) rank(x uint16) int {
|
||||
n := int64(len(rc.iv))
|
||||
xx := int64(x)
|
||||
w, already, _ := rc.search(xx, nil)
|
||||
if w < 0 {
|
||||
return 0
|
||||
}
|
||||
if !already && w == n-1 {
|
||||
return rc.getCardinality()
|
||||
}
|
||||
var rnk int64
|
||||
if !already {
|
||||
for i := int64(0); i <= w; i++ {
|
||||
rnk += rc.iv[i].runlen()
|
||||
}
|
||||
return int(rnk)
|
||||
}
|
||||
for i := int64(0); i < w; i++ {
|
||||
rnk += rc.iv[i].runlen()
|
||||
}
|
||||
rnk += int64(x-rc.iv[w].start) + 1
|
||||
return int(rnk)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) selectInt(x uint16) int {
|
||||
return rc.selectInt16(x)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andNotRunContainer16(b *runContainer16) container {
|
||||
return rc.AndNotRunContainer16(b)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andNotArray(ac *arrayContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
acb := ac.toBitmapContainer()
|
||||
return rcb.andNotBitmap(acb)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) andNotBitmap(bc *bitmapContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
return rcb.andNotBitmap(bc)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) toBitmapContainer() *bitmapContainer {
|
||||
p("run16 toBitmap starting; rc has %v ranges", len(rc.iv))
|
||||
bc := newBitmapContainer()
|
||||
for i := range rc.iv {
|
||||
bc.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
|
||||
}
|
||||
bc.computeCardinality()
|
||||
return bc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNotRunContainer16(x2 *runContainer16) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
x2b := x2.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(x2b)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNotArray(ac *arrayContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
acb := ac.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(acb)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) iandNotBitmap(bc *bitmapContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
rcb.iandNotBitmapSurely(bc)
|
||||
// TODO: check size and optimize the return value
|
||||
// TODO: is inplace modification really required? If not, elide the copy.
|
||||
rc2 := newRunContainer16FromBitmapContainer(rcb)
|
||||
*rc = *rc2
|
||||
return rc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xorRunContainer16(x2 *runContainer16) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
x2b := x2.toBitmapContainer()
|
||||
return rcb.xorBitmap(x2b)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xorArray(ac *arrayContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
acb := ac.toBitmapContainer()
|
||||
return rcb.xorBitmap(acb)
|
||||
}
|
||||
|
||||
func (rc *runContainer16) xorBitmap(bc *bitmapContainer) container {
|
||||
rcb := rc.toBitmapContainer()
|
||||
return rcb.xorBitmap(bc)
|
||||
}
|
||||
|
||||
// convert to bitmap or array *if needed*
|
||||
func (rc *runContainer16) toEfficientContainer() container {
|
||||
|
||||
// runContainer16SerializedSizeInBytes(numRuns)
|
||||
sizeAsRunContainer := rc.getSizeInBytes()
|
||||
sizeAsBitmapContainer := bitmapContainerSizeInBytes()
|
||||
card := int(rc.cardinality())
|
||||
sizeAsArrayContainer := arrayContainerSizeInBytes(card)
|
||||
if sizeAsRunContainer <= minOfInt(sizeAsBitmapContainer, sizeAsArrayContainer) {
|
||||
return rc
|
||||
}
|
||||
if card <= arrayDefaultMaxSize {
|
||||
return rc.toArrayContainer()
|
||||
}
|
||||
bc := newBitmapContainerFromRun(rc)
|
||||
return bc
|
||||
}
|
||||
|
||||
func (rc *runContainer16) toArrayContainer() *arrayContainer {
|
||||
ac := newArrayContainer()
|
||||
for i := range rc.iv {
|
||||
ac.iaddRange(int(rc.iv[i].start), int(rc.iv[i].last())+1)
|
||||
}
|
||||
return ac
|
||||
}
|
||||
|
||||
func newRunContainer16FromContainer(c container) *runContainer16 {
|
||||
|
||||
switch x := c.(type) {
|
||||
case *runContainer16:
|
||||
return x.Clone()
|
||||
case *arrayContainer:
|
||||
return newRunContainer16FromArray(x)
|
||||
case *bitmapContainer:
|
||||
return newRunContainer16FromBitmapContainer(x)
|
||||
}
|
||||
panic("unsupported container type")
|
||||
}
|
|
@ -6,12 +6,12 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Bitmap represents a compressed bitmap where you can add integers.
|
||||
|
@ -52,7 +52,7 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
|
|||
return rb.highlowcontainer.toBytes()
|
||||
}
|
||||
|
||||
// WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized
|
||||
// Deprecated: WriteToMsgpack writes a msgpack2/snappy-streaming compressed serialized
|
||||
// version of this bitmap to stream. The format is not
|
||||
// compatible with the WriteTo() format, and is
|
||||
// experimental: it may produce smaller on disk
|
||||
|
@ -67,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) {
|
|||
// The format is compatible with other RoaringBitmap
|
||||
// implementations (Java, C) and is documented here:
|
||||
// https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
|
||||
return rb.highlowcontainer.readFrom(stream)
|
||||
func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
|
||||
stream := byteInputAdapterPool.Get().(*byteInputAdapter)
|
||||
stream.reset(reader)
|
||||
|
||||
p, err = rb.highlowcontainer.readFrom(stream)
|
||||
byteInputAdapterPool.Put(stream)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// FromBuffer creates a bitmap from its serialized version stored in buffer
|
||||
|
@ -87,10 +93,36 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
|
|||
// You should *not* change the copy-on-write status of the resulting
|
||||
// bitmaps (SetCopyOnWrite).
|
||||
//
|
||||
func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) {
|
||||
return rb.highlowcontainer.fromBuffer(buf)
|
||||
// If buf becomes unavailable, then a bitmap created with
|
||||
// FromBuffer would be effectively broken. Furthermore, any
|
||||
// bitmap derived from this bitmap (e.g., via Or, And) might
|
||||
// also be broken. Thus, before making buf unavailable, you should
|
||||
// call CloneCopyOnWriteContainers on all such bitmaps.
|
||||
//
|
||||
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
|
||||
stream := byteBufferPool.Get().(*byteBuffer)
|
||||
stream.reset(buf)
|
||||
|
||||
p, err = rb.highlowcontainer.readFrom(stream)
|
||||
byteBufferPool.Put(stream)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
byteBufferPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &byteBuffer{}
|
||||
},
|
||||
}
|
||||
|
||||
byteInputAdapterPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &byteInputAdapter{}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
|
||||
func (rb *Bitmap) RunOptimize() {
|
||||
rb.highlowcontainer.runOptimize()
|
||||
|
@ -101,7 +133,7 @@ func (rb *Bitmap) HasRunCompression() bool {
|
|||
return rb.highlowcontainer.hasRunCompression()
|
||||
}
|
||||
|
||||
// ReadFromMsgpack reads a msgpack2/snappy-streaming serialized
|
||||
// Deprecated: ReadFromMsgpack reads a msgpack2/snappy-streaming serialized
|
||||
// version of this bitmap from stream. The format is
|
||||
// expected is that written by the WriteToMsgpack()
|
||||
// call; see additional notes there.
|
||||
|
@ -110,29 +142,15 @@ func (rb *Bitmap) ReadFromMsgpack(stream io.Reader) (int64, error) {
|
|||
}
|
||||
|
||||
// MarshalBinary implements the encoding.BinaryMarshaler interface for the bitmap
|
||||
// (same as ToBytes)
|
||||
func (rb *Bitmap) MarshalBinary() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
writer := bufio.NewWriter(&buf)
|
||||
_, err := rb.WriteTo(writer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = writer.Flush()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf.Bytes(), nil
|
||||
return rb.ToBytes()
|
||||
}
|
||||
|
||||
// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface for the bitmap
|
||||
func (rb *Bitmap) UnmarshalBinary(data []byte) error {
|
||||
var buf bytes.Buffer
|
||||
_, err := buf.Write(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
reader := bufio.NewReader(&buf)
|
||||
_, err = rb.ReadFrom(reader)
|
||||
r := bytes.NewReader(data)
|
||||
_, err := rb.ReadFrom(r)
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -215,10 +233,20 @@ type IntIterable interface {
|
|||
Next() uint32
|
||||
}
|
||||
|
||||
// IntPeekable allows you to look at the next value without advancing and
|
||||
// advance as long as the next value is smaller than minval
|
||||
type IntPeekable interface {
|
||||
IntIterable
|
||||
// PeekNext peeks the next value without advancing the iterator
|
||||
PeekNext() uint32
|
||||
// AdvanceIfNeeded advances as long as the next value is smaller than minval
|
||||
AdvanceIfNeeded(minval uint32)
|
||||
}
|
||||
|
||||
type intIterator struct {
|
||||
pos int
|
||||
hs uint32
|
||||
iter shortIterable
|
||||
iter shortPeekable
|
||||
highlowcontainer *roaringArray
|
||||
}
|
||||
|
||||
|
@ -244,6 +272,30 @@ func (ii *intIterator) Next() uint32 {
|
|||
return x
|
||||
}
|
||||
|
||||
// PeekNext peeks the next value without advancing the iterator
|
||||
func (ii *intIterator) PeekNext() uint32 {
|
||||
return uint32(ii.iter.peekNext()&maxLowBit) | ii.hs
|
||||
}
|
||||
|
||||
// AdvanceIfNeeded advances as long as the next value is smaller than minval
|
||||
func (ii *intIterator) AdvanceIfNeeded(minval uint32) {
|
||||
to := minval >> 16
|
||||
|
||||
for ii.HasNext() && (ii.hs>>16) < to {
|
||||
ii.pos++
|
||||
ii.init()
|
||||
}
|
||||
|
||||
if ii.HasNext() && (ii.hs>>16) == to {
|
||||
ii.iter.advanceIfNeeded(lowbits(minval))
|
||||
|
||||
if !ii.iter.hasNext() {
|
||||
ii.pos++
|
||||
ii.init()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newIntIterator(a *Bitmap) *intIterator {
|
||||
p := new(intIterator)
|
||||
p.pos = 0
|
||||
|
@ -252,6 +304,45 @@ func newIntIterator(a *Bitmap) *intIterator {
|
|||
return p
|
||||
}
|
||||
|
||||
type intReverseIterator struct {
|
||||
pos int
|
||||
hs uint32
|
||||
iter shortIterable
|
||||
highlowcontainer *roaringArray
|
||||
}
|
||||
|
||||
// HasNext returns true if there are more integers to iterate over
|
||||
func (ii *intReverseIterator) HasNext() bool {
|
||||
return ii.pos >= 0
|
||||
}
|
||||
|
||||
func (ii *intReverseIterator) init() {
|
||||
if ii.pos >= 0 {
|
||||
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).getReverseIterator()
|
||||
ii.hs = uint32(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 16
|
||||
} else {
|
||||
ii.iter = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Next returns the next integer
|
||||
func (ii *intReverseIterator) Next() uint32 {
|
||||
x := uint32(ii.iter.next()) | ii.hs
|
||||
if !ii.iter.hasNext() {
|
||||
ii.pos = ii.pos - 1
|
||||
ii.init()
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func newIntReverseIterator(a *Bitmap) *intReverseIterator {
|
||||
p := new(intReverseIterator)
|
||||
p.highlowcontainer = &a.highlowcontainer
|
||||
p.pos = a.highlowcontainer.size() - 1
|
||||
p.init()
|
||||
return p
|
||||
}
|
||||
|
||||
// ManyIntIterable allows you to iterate over the values in a Bitmap
|
||||
type ManyIntIterable interface {
|
||||
// pass in a buffer to fill up with values, returns how many values were returned
|
||||
|
@ -325,12 +416,20 @@ func (rb *Bitmap) String() string {
|
|||
return buffer.String()
|
||||
}
|
||||
|
||||
// Iterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order
|
||||
func (rb *Bitmap) Iterator() IntIterable {
|
||||
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
|
||||
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
|
||||
func (rb *Bitmap) Iterator() IntPeekable {
|
||||
return newIntIterator(rb)
|
||||
}
|
||||
|
||||
// Iterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order
|
||||
// ReverseIterator creates a new IntIterable to iterate over the integers contained in the bitmap, in sorted order;
|
||||
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
|
||||
func (rb *Bitmap) ReverseIterator() IntIterable {
|
||||
return newIntReverseIterator(rb)
|
||||
}
|
||||
|
||||
// ManyIterator creates a new ManyIntIterable to iterate over the integers contained in the bitmap, in sorted order;
|
||||
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
|
||||
func (rb *Bitmap) ManyIterator() ManyIntIterable {
|
||||
return newManyIntIterator(rb)
|
||||
}
|
||||
|
@ -374,6 +473,46 @@ func (rb *Bitmap) Equals(o interface{}) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// AddOffset adds the value 'offset' to each and every value in a bitmap, generating a new bitmap in the process
|
||||
func AddOffset(x *Bitmap, offset uint32) (answer *Bitmap) {
|
||||
containerOffset := highbits(offset)
|
||||
inOffset := lowbits(offset)
|
||||
if inOffset == 0 {
|
||||
answer = x.Clone()
|
||||
for pos := 0; pos < answer.highlowcontainer.size(); pos++ {
|
||||
key := answer.highlowcontainer.getKeyAtIndex(pos)
|
||||
key += containerOffset
|
||||
answer.highlowcontainer.keys[pos] = key
|
||||
}
|
||||
} else {
|
||||
answer = New()
|
||||
for pos := 0; pos < x.highlowcontainer.size(); pos++ {
|
||||
key := x.highlowcontainer.getKeyAtIndex(pos)
|
||||
key += containerOffset
|
||||
c := x.highlowcontainer.getContainerAtIndex(pos)
|
||||
offsetted := c.addOffset(inOffset)
|
||||
if offsetted[0].getCardinality() > 0 {
|
||||
curSize := answer.highlowcontainer.size()
|
||||
lastkey := uint16(0)
|
||||
if curSize > 0 {
|
||||
lastkey = answer.highlowcontainer.getKeyAtIndex(curSize - 1)
|
||||
}
|
||||
if curSize > 0 && lastkey == key {
|
||||
prev := answer.highlowcontainer.getContainerAtIndex(curSize - 1)
|
||||
orrseult := prev.ior(offsetted[0])
|
||||
answer.highlowcontainer.setContainerAtIndex(curSize-1, orrseult)
|
||||
} else {
|
||||
answer.highlowcontainer.appendContainer(key, offsetted[0], false)
|
||||
}
|
||||
}
|
||||
if offsetted[1].getCardinality() > 0 {
|
||||
answer.highlowcontainer.appendContainer(key+1, offsetted[1], false)
|
||||
}
|
||||
}
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
// Add the integer x to the bitmap
|
||||
func (rb *Bitmap) Add(x uint32) {
|
||||
hb := highbits(x)
|
||||
|
@ -794,11 +933,6 @@ main:
|
|||
}
|
||||
}
|
||||
|
||||
/*func (rb *Bitmap) Or(x2 *Bitmap) {
|
||||
results := Or(rb, x2) // Todo: could be computed in-place for reduced memory usage
|
||||
rb.highlowcontainer = results.highlowcontainer
|
||||
}*/
|
||||
|
||||
// AndNot computes the difference between two bitmaps and stores the result in the current bitmap
|
||||
func (rb *Bitmap) AndNot(x2 *Bitmap) {
|
||||
pos1 := 0
|
||||
|
@ -1086,10 +1220,10 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
|
|||
return
|
||||
}
|
||||
|
||||
hbStart := highbits(uint32(rangeStart))
|
||||
lbStart := lowbits(uint32(rangeStart))
|
||||
hbLast := highbits(uint32(rangeEnd - 1))
|
||||
lbLast := lowbits(uint32(rangeEnd - 1))
|
||||
hbStart := uint32(highbits(uint32(rangeStart)))
|
||||
lbStart := uint32(lowbits(uint32(rangeStart)))
|
||||
hbLast := uint32(highbits(uint32(rangeEnd - 1)))
|
||||
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
|
||||
|
||||
var max uint32 = maxLowBit
|
||||
for hb := hbStart; hb <= hbLast; hb++ {
|
||||
|
@ -1102,7 +1236,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
|
|||
containerLast = uint32(lbLast)
|
||||
}
|
||||
|
||||
i := rb.highlowcontainer.getIndex(hb)
|
||||
i := rb.highlowcontainer.getIndex(uint16(hb))
|
||||
|
||||
if i >= 0 {
|
||||
c := rb.highlowcontainer.getWritableContainerAtIndex(i).inot(int(containerStart), int(containerLast)+1)
|
||||
|
@ -1113,7 +1247,7 @@ func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
|
|||
}
|
||||
} else { // *think* the range of ones must never be
|
||||
// empty.
|
||||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast)))
|
||||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1139,24 +1273,24 @@ func (rb *Bitmap) AddRange(rangeStart, rangeEnd uint64) {
|
|||
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
|
||||
|
||||
var max uint32 = maxLowBit
|
||||
for hb := uint16(hbStart); hb <= uint16(hbLast); hb++ {
|
||||
for hb := hbStart; hb <= hbLast; hb++ {
|
||||
containerStart := uint32(0)
|
||||
if hb == uint16(hbStart) {
|
||||
if hb == hbStart {
|
||||
containerStart = lbStart
|
||||
}
|
||||
containerLast := max
|
||||
if hb == uint16(hbLast) {
|
||||
if hb == hbLast {
|
||||
containerLast = lbLast
|
||||
}
|
||||
|
||||
i := rb.highlowcontainer.getIndex(hb)
|
||||
i := rb.highlowcontainer.getIndex(uint16(hb))
|
||||
|
||||
if i >= 0 {
|
||||
c := rb.highlowcontainer.getWritableContainerAtIndex(i).iaddRange(int(containerStart), int(containerLast)+1)
|
||||
rb.highlowcontainer.setContainerAtIndex(i, c)
|
||||
} else { // *think* the range of ones must never be
|
||||
// empty.
|
||||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, rangeOfOnes(int(containerStart), int(containerLast)))
|
||||
rb.highlowcontainer.insertNewKeyValueAt(-i-1, uint16(hb), rangeOfOnes(int(containerStart), int(containerLast)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1243,13 +1377,13 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
|
|||
}
|
||||
|
||||
answer := NewBitmap()
|
||||
hbStart := highbits(uint32(rangeStart))
|
||||
lbStart := lowbits(uint32(rangeStart))
|
||||
hbLast := highbits(uint32(rangeEnd - 1))
|
||||
lbLast := lowbits(uint32(rangeEnd - 1))
|
||||
hbStart := uint32(highbits(uint32(rangeStart)))
|
||||
lbStart := uint32(lowbits(uint32(rangeStart)))
|
||||
hbLast := uint32(highbits(uint32(rangeEnd - 1)))
|
||||
lbLast := uint32(lowbits(uint32(rangeEnd - 1)))
|
||||
|
||||
// copy the containers before the active area
|
||||
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, hbStart)
|
||||
answer.highlowcontainer.appendCopiesUntil(bm.highlowcontainer, uint16(hbStart))
|
||||
|
||||
var max uint32 = maxLowBit
|
||||
for hb := hbStart; hb <= hbLast; hb++ {
|
||||
|
@ -1262,23 +1396,23 @@ func Flip(bm *Bitmap, rangeStart, rangeEnd uint64) *Bitmap {
|
|||
containerLast = uint32(lbLast)
|
||||
}
|
||||
|
||||
i := bm.highlowcontainer.getIndex(hb)
|
||||
j := answer.highlowcontainer.getIndex(hb)
|
||||
i := bm.highlowcontainer.getIndex(uint16(hb))
|
||||
j := answer.highlowcontainer.getIndex(uint16(hb))
|
||||
|
||||
if i >= 0 {
|
||||
c := bm.highlowcontainer.getContainerAtIndex(i).not(int(containerStart), int(containerLast)+1)
|
||||
if c.getCardinality() > 0 {
|
||||
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb, c)
|
||||
answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb), c)
|
||||
}
|
||||
|
||||
} else { // *think* the range of ones must never be
|
||||
// empty.
|
||||
answer.highlowcontainer.insertNewKeyValueAt(-j-1, hb,
|
||||
answer.highlowcontainer.insertNewKeyValueAt(-j-1, uint16(hb),
|
||||
rangeOfOnes(int(containerStart), int(containerLast)))
|
||||
}
|
||||
}
|
||||
// copy the containers after the active area.
|
||||
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, hbLast)
|
||||
answer.highlowcontainer.appendCopiesAfter(bm.highlowcontainer, uint16(hbLast))
|
||||
|
||||
return answer
|
||||
}
|
||||
|
@ -1296,6 +1430,21 @@ func (rb *Bitmap) GetCopyOnWrite() (val bool) {
|
|||
return rb.highlowcontainer.copyOnWrite
|
||||
}
|
||||
|
||||
// CloneCopyOnWriteContainers clones all containers which have
|
||||
// needCopyOnWrite set to true.
|
||||
// This can be used to make sure it is safe to munmap a []byte
|
||||
// that the roaring array may still have a reference to, after
|
||||
// calling FromBuffer.
|
||||
// More generally this function is useful if you call FromBuffer
|
||||
// to construct a bitmap with a backing array buf
|
||||
// and then later discard the buf array. Note that you should call
|
||||
// CloneCopyOnWriteContainers on all bitmaps that were derived
|
||||
// from the 'FromBuffer' bitmap since they map have dependencies
|
||||
// on the buf array as well.
|
||||
func (rb *Bitmap) CloneCopyOnWriteContainers() {
|
||||
rb.highlowcontainer.cloneCopyOnWriteContainers()
|
||||
}
|
||||
|
||||
// FlipInt calls Flip after casting the parameters (convenience method)
|
||||
func FlipInt(bm *Bitmap, rangeStart, rangeEnd int) *Bitmap {
|
||||
return Flip(bm, uint64(rangeStart), uint64(rangeEnd))
|
||||
|
|
|
@ -4,16 +4,16 @@ import (
|
|||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
snappy "github.com/glycerine/go-unsnap-stream"
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
"io"
|
||||
)
|
||||
|
||||
//go:generate msgp -unexported
|
||||
|
||||
type container interface {
|
||||
addOffset(uint16) []container
|
||||
|
||||
clone() container
|
||||
and(container) container
|
||||
andCardinality(container) int
|
||||
|
@ -37,7 +37,8 @@ type container interface {
|
|||
not(start, final int) container // range is [firstOfRange,lastOfRange)
|
||||
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||
xor(r container) container
|
||||
getShortIterator() shortIterable
|
||||
getShortIterator() shortPeekable
|
||||
getReverseIterator() shortIterable
|
||||
getManyIterator() manyIterable
|
||||
contains(i uint16) bool
|
||||
maximum() uint16
|
||||
|
@ -61,7 +62,6 @@ type container interface {
|
|||
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
|
||||
selectInt(x uint16) int // selectInt returns the xth integer in the container
|
||||
serializedSizeInBytes() int
|
||||
readFrom(io.Reader) (int, error)
|
||||
writeTo(io.Writer) (int, error)
|
||||
|
||||
numberOfRuns() int
|
||||
|
@ -280,6 +280,18 @@ func (ra *roaringArray) clone() *roaringArray {
|
|||
return &sa
|
||||
}
|
||||
|
||||
// clone all containers which have needCopyOnWrite set to true
|
||||
// This can be used to make sure it is safe to munmap a []byte
|
||||
// that the roaring array may still have a reference to.
|
||||
func (ra *roaringArray) cloneCopyOnWriteContainers() {
|
||||
for i, needCopyOnWrite := range ra.needCopyOnWrite {
|
||||
if needCopyOnWrite {
|
||||
ra.containers[i] = ra.containers[i].clone()
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unused function:
|
||||
//func (ra *roaringArray) containsKey(x uint16) bool {
|
||||
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
|
||||
|
@ -456,8 +468,7 @@ func (ra *roaringArray) serializedSizeInBytes() uint64 {
|
|||
//
|
||||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
//
|
||||
func (ra *roaringArray) toBytes() ([]byte, error) {
|
||||
stream := &bytes.Buffer{}
|
||||
func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
|
||||
hasRun := ra.hasRunCompression()
|
||||
isRunSizeInBytes := 0
|
||||
cookieSize := 8
|
||||
|
@ -522,79 +533,77 @@ func (ra *roaringArray) toBytes() ([]byte, error) {
|
|||
}
|
||||
}
|
||||
|
||||
_, err := stream.Write(buf[:nw])
|
||||
written, err := w.Write(buf[:nw])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return n, err
|
||||
}
|
||||
for i, c := range ra.containers {
|
||||
_ = i
|
||||
_, err := c.writeTo(stream)
|
||||
n += int64(written)
|
||||
|
||||
for _, c := range ra.containers {
|
||||
written, err := c.writeTo(w)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return n, err
|
||||
}
|
||||
n += int64(written)
|
||||
}
|
||||
return stream.Bytes(), nil
|
||||
return n, nil
|
||||
}
|
||||
|
||||
//
|
||||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
//
|
||||
func (ra *roaringArray) writeTo(out io.Writer) (int64, error) {
|
||||
by, err := ra.toBytes()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
n, err := out.Write(by)
|
||||
if err == nil && n < len(by) {
|
||||
err = io.ErrShortWrite
|
||||
}
|
||||
return int64(n), err
|
||||
func (ra *roaringArray) toBytes() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
_, err := ra.writeTo(&buf)
|
||||
return buf.Bytes(), err
|
||||
}
|
||||
|
||||
func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
|
||||
pos := 0
|
||||
if len(buf) < 8 {
|
||||
return 0, fmt.Errorf("buffer too small, expecting at least 8 bytes, was %d", len(buf))
|
||||
func (ra *roaringArray) readFrom(stream byteInput) (int64, error) {
|
||||
cookie, err := stream.readUInt32()
|
||||
|
||||
if err != nil {
|
||||
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
|
||||
}
|
||||
|
||||
cookie := binary.LittleEndian.Uint32(buf)
|
||||
pos += 4
|
||||
var size uint32 // number of containers
|
||||
haveRunContainers := false
|
||||
var size uint32
|
||||
var isRunBitmap []byte
|
||||
|
||||
// cookie header
|
||||
if cookie&0x0000FFFF == serialCookie {
|
||||
haveRunContainers = true
|
||||
size = uint32(uint16(cookie>>16) + 1) // number of containers
|
||||
|
||||
size = uint32(uint16(cookie>>16) + 1)
|
||||
// create is-run-container bitmap
|
||||
isRunBitmapSize := (int(size) + 7) / 8
|
||||
if pos+isRunBitmapSize > len(buf) {
|
||||
return 0, fmt.Errorf("malformed bitmap, is-run bitmap overruns buffer at %d", pos+isRunBitmapSize)
|
||||
isRunBitmap, err = stream.next(isRunBitmapSize)
|
||||
|
||||
if err != nil {
|
||||
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
|
||||
}
|
||||
|
||||
isRunBitmap = buf[pos : pos+isRunBitmapSize]
|
||||
pos += isRunBitmapSize
|
||||
} else if cookie == serialCookieNoRunContainer {
|
||||
size = binary.LittleEndian.Uint32(buf[pos:])
|
||||
pos += 4
|
||||
} else {
|
||||
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
||||
}
|
||||
if size > (1 << 16) {
|
||||
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
|
||||
}
|
||||
// descriptive header
|
||||
// keycard - is {key, cardinality} tuple slice
|
||||
if pos+2*2*int(size) > len(buf) {
|
||||
return 0, fmt.Errorf("malfomred bitmap, key-cardinality slice overruns buffer at %d", pos+2*2*int(size))
|
||||
}
|
||||
keycard := byteSliceAsUint16Slice(buf[pos : pos+2*2*int(size)])
|
||||
pos += 2 * 2 * int(size)
|
||||
size, err = stream.readUInt32()
|
||||
|
||||
if !haveRunContainers || size >= noOffsetThreshold {
|
||||
pos += 4 * int(size)
|
||||
if err != nil {
|
||||
return stream.getReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
|
||||
}
|
||||
} else {
|
||||
return stream.getReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
||||
}
|
||||
|
||||
if size > (1 << 16) {
|
||||
return stream.getReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
|
||||
}
|
||||
|
||||
// descriptive header
|
||||
buf, err := stream.next(2 * 2 * int(size))
|
||||
|
||||
if err != nil {
|
||||
return stream.getReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
|
||||
}
|
||||
|
||||
keycard := byteSliceAsUint16Slice(buf)
|
||||
|
||||
if isRunBitmap == nil || size >= noOffsetThreshold {
|
||||
if err := stream.skipBytes(int(size) * 4); err != nil {
|
||||
return stream.getReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate slices upfront as number of containers is known
|
||||
|
@ -603,11 +612,13 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
|
|||
} else {
|
||||
ra.containers = make([]container, size)
|
||||
}
|
||||
|
||||
if cap(ra.keys) >= int(size) {
|
||||
ra.keys = ra.keys[:size]
|
||||
} else {
|
||||
ra.keys = make([]uint16, size)
|
||||
}
|
||||
|
||||
if cap(ra.needCopyOnWrite) >= int(size) {
|
||||
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
|
||||
} else {
|
||||
|
@ -615,129 +626,62 @@ func (ra *roaringArray) fromBuffer(buf []byte) (int64, error) {
|
|||
}
|
||||
|
||||
for i := uint32(0); i < size; i++ {
|
||||
key := uint16(keycard[2*i])
|
||||
key := keycard[2*i]
|
||||
card := int(keycard[2*i+1]) + 1
|
||||
ra.keys[i] = key
|
||||
ra.needCopyOnWrite[i] = true
|
||||
|
||||
if haveRunContainers && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
|
||||
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
|
||||
// run container
|
||||
nr := binary.LittleEndian.Uint16(buf[pos:])
|
||||
pos += 2
|
||||
if pos+int(nr)*4 > len(buf) {
|
||||
return 0, fmt.Errorf("malformed bitmap, a run container overruns buffer at %d:%d", pos, pos+int(nr)*4)
|
||||
nr, err := stream.readUInt16()
|
||||
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
|
||||
}
|
||||
|
||||
buf, err := stream.next(int(nr) * 4)
|
||||
|
||||
if err != nil {
|
||||
return stream.getReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
|
||||
}
|
||||
|
||||
nb := runContainer16{
|
||||
iv: byteSliceAsInterval16Slice(buf[pos : pos+int(nr)*4]),
|
||||
iv: byteSliceAsInterval16Slice(buf),
|
||||
card: int64(card),
|
||||
}
|
||||
pos += int(nr) * 4
|
||||
|
||||
ra.containers[i] = &nb
|
||||
} else if card > arrayDefaultMaxSize {
|
||||
// bitmap container
|
||||
buf, err := stream.next(arrayDefaultMaxSize * 2)
|
||||
|
||||
if err != nil {
|
||||
return stream.getReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
|
||||
}
|
||||
|
||||
nb := bitmapContainer{
|
||||
cardinality: card,
|
||||
bitmap: byteSliceAsUint64Slice(buf[pos : pos+arrayDefaultMaxSize*2]),
|
||||
bitmap: byteSliceAsUint64Slice(buf),
|
||||
}
|
||||
pos += arrayDefaultMaxSize * 2
|
||||
|
||||
ra.containers[i] = &nb
|
||||
} else {
|
||||
// array container
|
||||
nb := arrayContainer{
|
||||
byteSliceAsUint16Slice(buf[pos : pos+card*2]),
|
||||
buf, err := stream.next(card * 2)
|
||||
|
||||
if err != nil {
|
||||
return stream.getReadBytes(), fmt.Errorf("failed to read array container: %s", err)
|
||||
}
|
||||
pos += card * 2
|
||||
|
||||
nb := arrayContainer{
|
||||
byteSliceAsUint16Slice(buf),
|
||||
}
|
||||
|
||||
ra.containers[i] = &nb
|
||||
}
|
||||
}
|
||||
|
||||
return int64(pos), nil
|
||||
}
|
||||
|
||||
func (ra *roaringArray) readFrom(stream io.Reader) (int64, error) {
|
||||
pos := 0
|
||||
var cookie uint32
|
||||
err := binary.Read(stream, binary.LittleEndian, &cookie)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
|
||||
}
|
||||
pos += 4
|
||||
var size uint32
|
||||
haveRunContainers := false
|
||||
var isRun *bitmapContainer
|
||||
if cookie&0x0000FFFF == serialCookie {
|
||||
haveRunContainers = true
|
||||
size = uint32(uint16(cookie>>16) + 1)
|
||||
bytesToRead := (int(size) + 7) / 8
|
||||
numwords := (bytesToRead + 7) / 8
|
||||
by := make([]byte, bytesToRead, numwords*8)
|
||||
nr, err := io.ReadFull(stream, by)
|
||||
if err != nil {
|
||||
return 8 + int64(nr), fmt.Errorf("error in readFrom: could not read the "+
|
||||
"runContainer bit flags of length %v bytes: %v", bytesToRead, err)
|
||||
}
|
||||
pos += bytesToRead
|
||||
by = by[:cap(by)]
|
||||
isRun = newBitmapContainer()
|
||||
for i := 0; i < numwords; i++ {
|
||||
isRun.bitmap[i] = binary.LittleEndian.Uint64(by)
|
||||
by = by[8:]
|
||||
}
|
||||
} else if cookie == serialCookieNoRunContainer {
|
||||
err = binary.Read(stream, binary.LittleEndian, &size)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error in roaringArray.readFrom: when reading size, got: %s", err)
|
||||
}
|
||||
pos += 4
|
||||
} else {
|
||||
return 0, fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
||||
}
|
||||
if size > (1 << 16) {
|
||||
return 0, fmt.Errorf("It is logically impossible to have more than (1<<16) containers.")
|
||||
}
|
||||
// descriptive header
|
||||
keycard := make([]uint16, 2*size, 2*size)
|
||||
err = binary.Read(stream, binary.LittleEndian, keycard)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
pos += 2 * 2 * int(size)
|
||||
// offset header
|
||||
if !haveRunContainers || size >= noOffsetThreshold {
|
||||
io.CopyN(ioutil.Discard, stream, 4*int64(size)) // we never skip ahead so this data can be ignored
|
||||
pos += 4 * int(size)
|
||||
}
|
||||
for i := uint32(0); i < size; i++ {
|
||||
key := int(keycard[2*i])
|
||||
card := int(keycard[2*i+1]) + 1
|
||||
if haveRunContainers && isRun.contains(uint16(i)) {
|
||||
nb := newRunContainer16()
|
||||
nr, err := nb.readFrom(stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
pos += nr
|
||||
ra.appendContainer(uint16(key), nb, false)
|
||||
} else if card > arrayDefaultMaxSize {
|
||||
nb := newBitmapContainer()
|
||||
nr, err := nb.readFrom(stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
nb.cardinality = card
|
||||
pos += nr
|
||||
ra.appendContainer(keycard[2*i], nb, false)
|
||||
} else {
|
||||
nb := newArrayContainerSize(card)
|
||||
nr, err := nb.readFrom(stream)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
pos += nr
|
||||
ra.appendContainer(keycard[2*i], nb, false)
|
||||
}
|
||||
}
|
||||
return int64(pos), nil
|
||||
return stream.getReadBytes(), nil
|
||||
}
|
||||
|
||||
func (ra *roaringArray) hasRunCompression() bool {
|
||||
|
|
|
@ -8,7 +8,7 @@ import (
|
|||
"github.com/tinylib/msgp/msgp"
|
||||
)
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -48,7 +48,7 @@ func (z *containerSerz) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 2
|
||||
// write "t"
|
||||
|
@ -72,7 +72,7 @@ func (z *containerSerz) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 2
|
||||
|
@ -88,7 +88,7 @@ func (z *containerSerz) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -129,13 +129,13 @@ func (z *containerSerz) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *containerSerz) Msgsize() (s int) {
|
||||
s = 1 + 2 + msgp.Uint8Size + 2 + z.r.Msgsize()
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
{
|
||||
var zajw uint8
|
||||
|
@ -148,7 +148,7 @@ func (z *contype) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
err = en.WriteUint8(uint8(z))
|
||||
if err != nil {
|
||||
|
@ -157,14 +157,14 @@ func (z contype) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z contype) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
o = msgp.AppendUint8(o, uint8(z))
|
||||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
{
|
||||
var zwht uint8
|
||||
|
@ -178,13 +178,13 @@ func (z *contype) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z contype) Msgsize() (s int) {
|
||||
s = msgp.Uint8Size
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -295,7 +295,7 @@ func (z *roaringArray) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 4
|
||||
// write "keys"
|
||||
|
@ -370,7 +370,7 @@ func (z *roaringArray) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 4
|
||||
|
@ -407,7 +407,7 @@ func (z *roaringArray) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -519,7 +519,7 @@ func (z *roaringArray) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *roaringArray) Msgsize() (s int) {
|
||||
s = 1 + 5 + msgp.ArrayHeaderSize + (len(z.keys) * (msgp.Uint16Size)) + 16 + msgp.ArrayHeaderSize + (len(z.needCopyOnWrite) * (msgp.BoolSize)) + 12 + msgp.BoolSize + 8 + msgp.ArrayHeaderSize
|
||||
for zxhx := range z.conserz {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -6,7 +6,7 @@ package roaring
|
|||
|
||||
import "github.com/tinylib/msgp/msgp"
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -169,7 +169,7 @@ func (z *addHelper16) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 5
|
||||
// write "runstart"
|
||||
|
@ -284,7 +284,7 @@ func (z *addHelper16) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 5
|
||||
|
@ -334,7 +334,7 @@ func (z *addHelper16) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -498,7 +498,7 @@ func (z *addHelper16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *addHelper16) Msgsize() (s int) {
|
||||
s = 1 + 9 + msgp.Uint16Size + 7 + msgp.Uint16Size + 14 + msgp.Uint16Size + 2 + msgp.ArrayHeaderSize + (len(z.m) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 3
|
||||
if z.rc == nil {
|
||||
|
@ -509,7 +509,7 @@ func (z *addHelper16) Msgsize() (s int) {
|
|||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -546,7 +546,7 @@ func (z *interval16) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z interval16) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 2
|
||||
// write "start"
|
||||
|
@ -570,7 +570,7 @@ func (z interval16) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z interval16) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 2
|
||||
|
@ -583,7 +583,7 @@ func (z interval16) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -621,13 +621,13 @@ func (z *interval16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z interval16) Msgsize() (s int) {
|
||||
s = 1 + 6 + msgp.Uint16Size + 5 + msgp.Uint16Size
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -701,7 +701,7 @@ func (z *runContainer16) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 2
|
||||
// write "iv"
|
||||
|
@ -746,7 +746,7 @@ func (z *runContainer16) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 2
|
||||
|
@ -768,7 +768,7 @@ func (z *runContainer16) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -843,13 +843,13 @@ func (z *runContainer16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *runContainer16) Msgsize() (s int) {
|
||||
s = 1 + 3 + msgp.ArrayHeaderSize + (len(z.iv) * (12 + msgp.Uint16Size + msgp.Uint16Size)) + 5 + msgp.Int64Size
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -891,11 +891,6 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "curSeq":
|
||||
z.curSeq, err = dc.ReadInt64()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
|
@ -906,11 +901,11 @@ func (z *runIterator16) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 4
|
||||
// map header, size 3
|
||||
// write "rc"
|
||||
err = en.Append(0x84, 0xa2, 0x72, 0x63)
|
||||
err = en.Append(0x83, 0xa2, 0x72, 0x63)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -943,24 +938,15 @@ func (z *runIterator16) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
if err != nil {
|
||||
return
|
||||
}
|
||||
// write "curSeq"
|
||||
err = en.Append(0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = en.WriteInt64(z.curSeq)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 4
|
||||
// map header, size 3
|
||||
// string "rc"
|
||||
o = append(o, 0x84, 0xa2, 0x72, 0x63)
|
||||
o = append(o, 0x83, 0xa2, 0x72, 0x63)
|
||||
if z.rc == nil {
|
||||
o = msgp.AppendNil(o)
|
||||
} else {
|
||||
|
@ -975,13 +961,10 @@ func (z *runIterator16) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
// string "curPosInIndex"
|
||||
o = append(o, 0xad, 0x63, 0x75, 0x72, 0x50, 0x6f, 0x73, 0x49, 0x6e, 0x49, 0x6e, 0x64, 0x65, 0x78)
|
||||
o = msgp.AppendUint16(o, z.curPosInIndex)
|
||||
// string "curSeq"
|
||||
o = append(o, 0xa6, 0x63, 0x75, 0x72, 0x53, 0x65, 0x71)
|
||||
o = msgp.AppendInt64(o, z.curSeq)
|
||||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var field []byte
|
||||
_ = field
|
||||
|
@ -1023,11 +1006,6 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
if err != nil {
|
||||
return
|
||||
}
|
||||
case "curSeq":
|
||||
z.curSeq, bts, err = msgp.ReadInt64Bytes(bts)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
|
@ -1039,7 +1017,7 @@ func (z *runIterator16) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z *runIterator16) Msgsize() (s int) {
|
||||
s = 1 + 3
|
||||
if z.rc == nil {
|
||||
|
@ -1047,11 +1025,11 @@ func (z *runIterator16) Msgsize() (s int) {
|
|||
} else {
|
||||
s += z.rc.Msgsize()
|
||||
}
|
||||
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size + 7 + msgp.Int64Size
|
||||
s += 9 + msgp.Int64Size + 14 + msgp.Uint16Size
|
||||
return
|
||||
}
|
||||
|
||||
// DecodeMsg implements msgp.Decodable
|
||||
// Deprecated: DecodeMsg implements msgp.Decodable
|
||||
func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
var zjpj uint32
|
||||
zjpj, err = dc.ReadArrayHeader()
|
||||
|
@ -1072,7 +1050,7 @@ func (z *uint16Slice) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
// Deprecated: EncodeMsg implements msgp.Encodable
|
||||
func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
err = en.WriteArrayHeader(uint32(len(z)))
|
||||
if err != nil {
|
||||
|
@ -1087,7 +1065,7 @@ func (z uint16Slice) EncodeMsg(en *msgp.Writer) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
// Deprecated: MarshalMsg implements msgp.Marshaler
|
||||
func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
o = msgp.AppendArrayHeader(o, uint32(len(z)))
|
||||
|
@ -1097,7 +1075,7 @@ func (z uint16Slice) MarshalMsg(b []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// UnmarshalMsg implements msgp.Unmarshaler
|
||||
// Deprecated: UnmarshalMsg implements msgp.Unmarshaler
|
||||
func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
var zgmo uint32
|
||||
zgmo, bts, err = msgp.ReadArrayHeaderBytes(bts)
|
||||
|
@ -1119,7 +1097,7 @@ func (z *uint16Slice) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
// Deprecated: Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z uint16Slice) Msgsize() (s int) {
|
||||
s = msgp.ArrayHeaderSize + (len(z) * (msgp.Uint16Size))
|
||||
return
|
|
@ -2,8 +2,6 @@ package roaring
|
|||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
|
@ -22,14 +20,6 @@ func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
|
|||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func (b *runContainer32) writeToMsgpack(stream io.Writer) (int, error) {
|
||||
bts, err := b.MarshalMsg(nil)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return stream.Write(bts)
|
||||
}
|
||||
|
||||
func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
|
||||
bts, err := b.MarshalMsg(nil)
|
||||
if err != nil {
|
||||
|
@ -38,46 +28,7 @@ func (b *runContainer16) writeToMsgpack(stream io.Writer) (int, error) {
|
|||
return stream.Write(bts)
|
||||
}
|
||||
|
||||
func (b *runContainer32) readFromMsgpack(stream io.Reader) (int, error) {
|
||||
err := msgp.Decode(stream, b)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
func (b *runContainer16) readFromMsgpack(stream io.Reader) (int, error) {
|
||||
err := msgp.Decode(stream, b)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var errCorruptedStream = errors.New("insufficient/odd number of stored bytes, corrupted stream detected")
|
||||
|
||||
func (b *runContainer16) readFrom(stream io.Reader) (int, error) {
|
||||
b.iv = b.iv[:0]
|
||||
b.card = 0
|
||||
var numRuns uint16
|
||||
err := binary.Read(stream, binary.LittleEndian, &numRuns)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
nr := int(numRuns)
|
||||
encRun := make([]uint16, 2*nr)
|
||||
by := make([]byte, 4*nr)
|
||||
err = binary.Read(stream, binary.LittleEndian, &by)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
for i := range encRun {
|
||||
if len(by) < 2 {
|
||||
return 0, errCorruptedStream
|
||||
}
|
||||
encRun[i] = binary.LittleEndian.Uint16(by)
|
||||
by = by[2:]
|
||||
}
|
||||
for i := 0; i < nr; i++ {
|
||||
if i > 0 && b.iv[i-1].last() >= encRun[i*2] {
|
||||
return 0, fmt.Errorf("error: stored runContainer had runs that were not in sorted order!! (b.iv[i-1=%v].last = %v >= encRun[i=%v] = %v)", i-1, b.iv[i-1].last(), i, encRun[i*2])
|
||||
}
|
||||
b.iv = append(b.iv, interval16{start: encRun[i*2], length: encRun[i*2+1]})
|
||||
b.card += int64(encRun[i*2+1]) + 1
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ package roaring
|
|||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
|
@ -26,6 +27,10 @@ func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
|
|||
}
|
||||
|
||||
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||
if b.cardinality <= arrayDefaultMaxSize {
|
||||
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
|
||||
}
|
||||
|
||||
// Write set
|
||||
buf := make([]byte, 8*len(b.bitmap))
|
||||
for i, v := range b.bitmap {
|
||||
|
@ -69,6 +74,16 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
|
|||
return by
|
||||
}
|
||||
|
||||
func uint16SliceAsByteSlice(slice []uint16) []byte {
|
||||
by := make([]byte, len(slice)*2)
|
||||
|
||||
for i, v := range slice {
|
||||
binary.LittleEndian.PutUint16(by[i*2:], v)
|
||||
}
|
||||
|
||||
return by
|
||||
}
|
||||
|
||||
func byteSliceAsUint16Slice(slice []byte) []uint16 {
|
||||
if len(slice)%2 != 0 {
|
||||
panic("Slice size should be divisible by 2")
|
||||
|
|
|
@ -3,8 +3,10 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
|
@ -14,26 +16,13 @@ func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
|
|||
}
|
||||
|
||||
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||
if bc.cardinality <= arrayDefaultMaxSize {
|
||||
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
|
||||
}
|
||||
buf := uint64SliceAsByteSlice(bc.bitmap)
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
// readFrom reads an arrayContainer from stream.
|
||||
// PRE-REQUISITE: you must size the arrayContainer correctly (allocate b.content)
|
||||
// *before* you call readFrom. We can't guess the size in the stream
|
||||
// by this point.
|
||||
func (ac *arrayContainer) readFrom(stream io.Reader) (int, error) {
|
||||
buf := uint16SliceAsByteSlice(ac.content)
|
||||
return io.ReadFull(stream, buf)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) readFrom(stream io.Reader) (int, error) {
|
||||
buf := uint64SliceAsByteSlice(bc.bitmap)
|
||||
n, err := io.ReadFull(stream, buf)
|
||||
bc.computeCardinality()
|
||||
return n, err
|
||||
}
|
||||
|
||||
func uint64SliceAsByteSlice(slice []uint64) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
@ -42,8 +31,12 @@ func uint64SliceAsByteSlice(slice []uint64) []byte {
|
|||
header.Len *= 8
|
||||
header.Cap *= 8
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||
runtime.KeepAlive(&slice)
|
||||
|
||||
// return it
|
||||
return *(*[]byte)(unsafe.Pointer(&header))
|
||||
return result
|
||||
}
|
||||
|
||||
func uint16SliceAsByteSlice(slice []uint16) []byte {
|
||||
|
@ -54,8 +47,12 @@ func uint16SliceAsByteSlice(slice []uint16) []byte {
|
|||
header.Len *= 2
|
||||
header.Cap *= 2
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||
runtime.KeepAlive(&slice)
|
||||
|
||||
// return it
|
||||
return *(*[]byte)(unsafe.Pointer(&header))
|
||||
return result
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||
|
@ -64,50 +61,74 @@ func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
|||
|
||||
// Deserialization code follows
|
||||
|
||||
func byteSliceAsUint16Slice(slice []byte) []uint16 {
|
||||
////
|
||||
// These methods (byteSliceAsUint16Slice,...) do not make copies,
|
||||
// they are pointer-based (unsafe). The caller is responsible to
|
||||
// ensure that the input slice does not get garbage collected, deleted
|
||||
// or modified while you hold the returned slince.
|
||||
////
|
||||
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
|
||||
if len(slice)%2 != 0 {
|
||||
panic("Slice size should be divisible by 2")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len /= 2
|
||||
header.Cap /= 2
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 2
|
||||
rHeader.Cap = bHeader.Cap / 2
|
||||
|
||||
// return it
|
||||
return *(*[]uint16)(unsafe.Pointer(&header))
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsUint64Slice(slice []byte) []uint64 {
|
||||
func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
|
||||
if len(slice)%8 != 0 {
|
||||
panic("Slice size should be divisible by 8")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len /= 8
|
||||
header.Cap /= 8
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 8
|
||||
rHeader.Cap = bHeader.Cap / 8
|
||||
|
||||
// return it
|
||||
return *(*[]uint64)(unsafe.Pointer(&header))
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsInterval16Slice(slice []byte) []interval16 {
|
||||
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
|
||||
if len(slice)%4 != 0 {
|
||||
panic("Slice size should be divisible by 4")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len /= 4
|
||||
header.Cap /= 4
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 4
|
||||
rHeader.Cap = bHeader.Cap / 4
|
||||
|
||||
// return it
|
||||
return *(*[]interval16)(unsafe.Pointer(&header))
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
|
|
@ -5,6 +5,12 @@ type shortIterable interface {
|
|||
next() uint16
|
||||
}
|
||||
|
||||
type shortPeekable interface {
|
||||
shortIterable
|
||||
peekNext() uint16
|
||||
advanceIfNeeded(minval uint16)
|
||||
}
|
||||
|
||||
type shortIterator struct {
|
||||
slice []uint16
|
||||
loc int
|
||||
|
@ -19,3 +25,28 @@ func (si *shortIterator) next() uint16 {
|
|||
si.loc++
|
||||
return a
|
||||
}
|
||||
|
||||
func (si *shortIterator) peekNext() uint16 {
|
||||
return si.slice[si.loc]
|
||||
}
|
||||
|
||||
func (si *shortIterator) advanceIfNeeded(minval uint16) {
|
||||
if si.hasNext() && si.peekNext() < minval {
|
||||
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
|
||||
}
|
||||
}
|
||||
|
||||
type reverseIterator struct {
|
||||
slice []uint16
|
||||
loc int
|
||||
}
|
||||
|
||||
func (si *reverseIterator) hasNext() bool {
|
||||
return si.loc >= 0
|
||||
}
|
||||
|
||||
func (si *reverseIterator) next() uint16 {
|
||||
a := si.slice[si.loc]
|
||||
si.loc--
|
||||
return a
|
||||
}
|
||||
|
|
|
@ -14,6 +14,17 @@ const (
|
|||
serialCookie = 12347 // runs, arrays, and bitmaps
|
||||
noOffsetThreshold = 4
|
||||
|
||||
// MaxUint32 is the largest uint32 value.
|
||||
MaxUint32 = 4294967295
|
||||
|
||||
// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper
|
||||
// bound for ranges.
|
||||
MaxRange uint64 = MaxUint32 + 1
|
||||
|
||||
// MaxUint16 is the largest 16 bit unsigned int.
|
||||
// This is the largest value an interval16 can store.
|
||||
MaxUint16 = 65535
|
||||
|
||||
// Compute wordSizeInBytes, the size of a word in bytes.
|
||||
_m = ^uint64(0)
|
||||
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
|
||||
|
@ -114,7 +125,6 @@ func flipBitmapRange(bitmap []uint64, start int, end int) {
|
|||
endword := (end - 1) / 64
|
||||
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
|
||||
for i := firstword; i < endword; i++ {
|
||||
//p("flipBitmapRange on i=%v", i)
|
||||
bitmap[i] = ^bitmap[i]
|
||||
}
|
||||
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
|
||||
|
@ -292,24 +302,3 @@ func minOfUint16(a, b uint16) uint16 {
|
|||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxUint16(a, b uint16) uint16 {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func minUint16(a, b uint16) uint16 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
|
|
@ -3,9 +3,9 @@ sudo: false
|
|||
language: go
|
||||
|
||||
go:
|
||||
- "1.9.x"
|
||||
- "1.10.x"
|
||||
- "1.11.x"
|
||||
- "1.12.x"
|
||||
|
||||
script:
|
||||
- go get golang.org/x/tools/cmd/cover
|
||||
|
@ -15,7 +15,12 @@ script:
|
|||
- gvt restore
|
||||
- go test -race -v $(go list ./... | grep -v vendor/)
|
||||
- go vet $(go list ./... | grep -v vendor/)
|
||||
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
|
||||
- go test ./test -v -indexType scorch
|
||||
- if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
|
||||
echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
|
||||
else
|
||||
errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
|
||||
fi
|
||||
- docs/project-code-coverage.sh
|
||||
- docs/build_children.sh
|
||||
|
||||
|
|
|
@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
|
|||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
func (t *TextField) Analyzer() *analysis.Analyzer {
|
||||
return t.analyzer
|
||||
}
|
||||
|
||||
func (t *TextField) Value() []byte {
|
||||
return t.value
|
||||
}
|
||||
|
|
|
@ -37,6 +37,12 @@ var geoTolerance = 1E-6
|
|||
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
|
||||
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
|
||||
|
||||
// Point represents a geo point.
|
||||
type Point struct {
|
||||
Lon float64
|
||||
Lat float64
|
||||
}
|
||||
|
||||
// MortonHash computes the morton hash value for the provided geo point
|
||||
// This point is ordered as lon, lat.
|
||||
func MortonHash(lon, lat float64) uint64 {
|
||||
|
@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func BoundingRectangleForPolygon(polygon []Point) (
|
||||
float64, float64, float64, float64, error) {
|
||||
err := checkLongitude(polygon[0].Lon)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
err = checkLatitude(polygon[0].Lat)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
maxY, minY := polygon[0].Lat, polygon[0].Lat
|
||||
maxX, minX := polygon[0].Lon, polygon[0].Lon
|
||||
for i := 1; i < len(polygon); i++ {
|
||||
err := checkLongitude(polygon[i].Lon)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
err = checkLatitude(polygon[i].Lat)
|
||||
if err != nil {
|
||||
return 0, 0, 0, 0, err
|
||||
}
|
||||
|
||||
maxY = math.Max(maxY, polygon[i].Lat)
|
||||
minY = math.Min(minY, polygon[i].Lat)
|
||||
|
||||
maxX = math.Max(maxX, polygon[i].Lon)
|
||||
minX = math.Min(minX, polygon[i].Lon)
|
||||
}
|
||||
|
||||
return minX, maxY, maxX, minY, nil
|
||||
}
|
||||
|
|
|
@ -1,32 +1,21 @@
|
|||
// The code here was obtained from:
|
||||
// https://github.com/mmcloughlin/geohash
|
||||
|
||||
// The MIT License (MIT)
|
||||
// Copyright (c) 2015 Michael McLoughlin
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
// This implementation is inspired from the geohash-js
|
||||
// ref: https://github.com/davetroy/geohash-js
|
||||
|
||||
package geo
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
// encoding encapsulates an encoding defined by a given base32 alphabet.
|
||||
type encoding struct {
|
||||
enc string
|
||||
|
@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding {
|
|||
return e
|
||||
}
|
||||
|
||||
// Decode string into bits of a 64-bit word. The string s may be at most 12
|
||||
// characters.
|
||||
func (e *encoding) decode(s string) uint64 {
|
||||
x := uint64(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
x = (x << 5) | uint64(e.dec[s[i]])
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// Encode bits of 64-bit word into a string.
|
||||
func (e *encoding) encode(x uint64) string {
|
||||
b := [12]byte{}
|
||||
for i := 0; i < 12; i++ {
|
||||
b[11-i] = e.enc[x&0x1f]
|
||||
x >>= 5
|
||||
}
|
||||
return string(b[:])
|
||||
}
|
||||
|
||||
// Base32Encoding with the Geohash alphabet.
|
||||
// base32encoding with the Geohash alphabet.
|
||||
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
|
||||
|
||||
// BoundingBox returns the region encoded by the given string geohash.
|
||||
func geoBoundingBox(hash string) geoBox {
|
||||
bits := uint(5 * len(hash))
|
||||
inthash := base32encoding.decode(hash)
|
||||
return geoBoundingBoxIntWithPrecision(inthash, bits)
|
||||
}
|
||||
var masks = []uint64{16, 8, 4, 2, 1}
|
||||
|
||||
// Box represents a rectangle in latitude/longitude space.
|
||||
type geoBox struct {
|
||||
minLat float64
|
||||
maxLat float64
|
||||
minLng float64
|
||||
maxLng float64
|
||||
}
|
||||
// DecodeGeoHash decodes the string geohash faster with
|
||||
// higher precision. This api is in experimental phase.
|
||||
func DecodeGeoHash(geoHash string) (float64, float64) {
|
||||
even := true
|
||||
lat := []float64{-90.0, 90.0}
|
||||
lon := []float64{-180.0, 180.0}
|
||||
|
||||
// Round returns a point inside the box, making an effort to round to minimal
|
||||
// precision.
|
||||
func (b geoBox) round() (lat, lng float64) {
|
||||
x := maxDecimalPower(b.maxLat - b.minLat)
|
||||
lat = math.Ceil(b.minLat/x) * x
|
||||
x = maxDecimalPower(b.maxLng - b.minLng)
|
||||
lng = math.Ceil(b.minLng/x) * x
|
||||
return
|
||||
}
|
||||
|
||||
// precalculated for performance
|
||||
var exp232 = math.Exp2(32)
|
||||
|
||||
// errorWithPrecision returns the error range in latitude and longitude for in
|
||||
// integer geohash with bits of precision.
|
||||
func errorWithPrecision(bits uint) (latErr, lngErr float64) {
|
||||
b := int(bits)
|
||||
latBits := b / 2
|
||||
lngBits := b - latBits
|
||||
latErr = math.Ldexp(180.0, -latBits)
|
||||
lngErr = math.Ldexp(360.0, -lngBits)
|
||||
return
|
||||
}
|
||||
|
||||
// minDecimalPlaces returns the minimum number of decimal places such that
|
||||
// there must exist an number with that many places within any range of width
|
||||
// r. This is intended for returning minimal precision coordinates inside a
|
||||
// box.
|
||||
func maxDecimalPower(r float64) float64 {
|
||||
m := int(math.Floor(math.Log10(r)))
|
||||
return math.Pow10(m)
|
||||
}
|
||||
|
||||
// Encode the position of x within the range -r to +r as a 32-bit integer.
|
||||
func encodeRange(x, r float64) uint32 {
|
||||
p := (x + r) / (2 * r)
|
||||
return uint32(p * exp232)
|
||||
}
|
||||
|
||||
// Decode the 32-bit range encoding X back to a value in the range -r to +r.
|
||||
func decodeRange(X uint32, r float64) float64 {
|
||||
p := float64(X) / exp232
|
||||
x := 2*r*p - r
|
||||
return x
|
||||
}
|
||||
|
||||
// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
|
||||
// ignored, and may take any value.
|
||||
func squash(X uint64) uint32 {
|
||||
X &= 0x5555555555555555
|
||||
X = (X | (X >> 1)) & 0x3333333333333333
|
||||
X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
|
||||
X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
|
||||
X = (X | (X >> 8)) & 0x0000ffff0000ffff
|
||||
X = (X | (X >> 16)) & 0x00000000ffffffff
|
||||
return uint32(X)
|
||||
}
|
||||
|
||||
// Deinterleave the bits of X into 32-bit words containing the even and odd
|
||||
// bitlevels of X, respectively.
|
||||
func deinterleave(X uint64) (uint32, uint32) {
|
||||
return squash(X), squash(X >> 1)
|
||||
}
|
||||
|
||||
// BoundingBoxIntWithPrecision returns the region encoded by the integer
|
||||
// geohash with the specified precision.
|
||||
func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
|
||||
fullHash := hash << (64 - bits)
|
||||
latInt, lngInt := deinterleave(fullHash)
|
||||
lat := decodeRange(latInt, 90)
|
||||
lng := decodeRange(lngInt, 180)
|
||||
latErr, lngErr := errorWithPrecision(bits)
|
||||
return geoBox{
|
||||
minLat: lat,
|
||||
maxLat: lat + latErr,
|
||||
minLng: lng,
|
||||
maxLng: lng + lngErr,
|
||||
for i := 0; i < len(geoHash); i++ {
|
||||
cd := uint64(base32encoding.dec[geoHash[i]])
|
||||
for j := 0; j < 5; j++ {
|
||||
if even {
|
||||
if cd&masks[j] > 0 {
|
||||
lon[0] = (lon[0] + lon[1]) / 2
|
||||
} else {
|
||||
lon[1] = (lon[0] + lon[1]) / 2
|
||||
}
|
||||
} else {
|
||||
if cd&masks[j] > 0 {
|
||||
lat[0] = (lat[0] + lat[1]) / 2
|
||||
} else {
|
||||
lat[1] = (lat[0] + lat[1]) / 2
|
||||
}
|
||||
}
|
||||
even = !even
|
||||
}
|
||||
}
|
||||
|
||||
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
func EncodeGeoHash(lat, lon float64) string {
|
||||
even := true
|
||||
lats := []float64{-90.0, 90.0}
|
||||
lons := []float64{-180.0, 180.0}
|
||||
precision := 12
|
||||
var ch, bit uint64
|
||||
var geoHash string
|
||||
|
||||
// Decode the string geohash to a (lat, lng) point.
|
||||
func GeoHashDecode(hash string) (lat, lng float64) {
|
||||
box := geoBoundingBox(hash)
|
||||
return box.round()
|
||||
for len(geoHash) < precision {
|
||||
if even {
|
||||
mid := (lons[0] + lons[1]) / 2
|
||||
if lon > mid {
|
||||
ch |= masks[bit]
|
||||
lons[0] = mid
|
||||
} else {
|
||||
lons[1] = mid
|
||||
}
|
||||
} else {
|
||||
mid := (lats[0] + lats[1]) / 2
|
||||
if lat > mid {
|
||||
ch |= masks[bit]
|
||||
lats[0] = mid
|
||||
} else {
|
||||
lats[1] = mid
|
||||
}
|
||||
}
|
||||
even = !even
|
||||
if bit < 4 {
|
||||
bit++
|
||||
} else {
|
||||
geoHash += string(base32encoding.enc[ch])
|
||||
ch = 0
|
||||
bit = 0
|
||||
}
|
||||
}
|
||||
|
||||
return geoHash
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
|
|||
}
|
||||
} else {
|
||||
// geohash
|
||||
lat, lon = GeoHashDecode(geoStr)
|
||||
lat, lon = DecodeGeoHash(geoStr)
|
||||
foundLat = true
|
||||
foundLon = true
|
||||
}
|
||||
|
|
|
@ -117,6 +117,8 @@ func (b *Batch) String() string {
|
|||
// be re-used in the future.
|
||||
func (b *Batch) Reset() {
|
||||
b.internal.Reset()
|
||||
b.lastDocSize = 0
|
||||
b.totalSize = 0
|
||||
}
|
||||
|
||||
func (b *Batch) Merge(o *Batch) {
|
||||
|
|
|
@ -121,6 +121,10 @@ type IndexReaderOnly interface {
|
|||
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
|
||||
}
|
||||
|
||||
type IndexReaderContains interface {
|
||||
FieldDictContains(field string) (FieldDictContains, error)
|
||||
}
|
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
type FieldTerms map[string][]string
|
||||
|
||||
|
@ -230,6 +234,10 @@ type FieldDict interface {
|
|||
Close() error
|
||||
}
|
||||
|
||||
type FieldDictContains interface {
|
||||
Contains(key []byte) (bool, error)
|
||||
}
|
||||
|
||||
// DocIDReader is the interface exposing enumeration of documents identifiers.
|
||||
// Close the reader to release associated resources.
|
||||
type DocIDReader interface {
|
||||
|
|
|
@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
fileSegments++
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// before the newMerge introduction, need to clean the newly
|
||||
|
@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In case where all the docs in the newly merged segment getting
|
||||
// deleted by the time we reach here, can skip the introduction.
|
||||
if nextMerge.new != nil &&
|
||||
|
@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
|
||||
|
||||
newSnapshot.updateSize()
|
||||
|
||||
s.rootLock.Lock()
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
|
@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
|
|||
}
|
||||
|
||||
newSnapshot.updateSize()
|
||||
|
||||
// swap in new snapshot
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
|
@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
|
||||
|
||||
// process tasks in serial for now
|
||||
var notifications []chan *IndexSnapshot
|
||||
var filenames []string
|
||||
for _, task := range resultMergePlan.Tasks {
|
||||
if len(task.Segments) == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
|
||||
|
@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
segmentsToMerge = append(segmentsToMerge, zapSeg)
|
||||
docsToDrop = append(docsToDrop, segSnapshot.deleted)
|
||||
}
|
||||
// track the files getting merged for unsetting the
|
||||
// removal ineligibility. This helps to unflip files
|
||||
// even with fast merger, slow persister work flows.
|
||||
path := zapSeg.Path()
|
||||
filenames = append(filenames,
|
||||
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
return err
|
||||
}
|
||||
err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
return fmt.Errorf("merge validation failed: %v", err)
|
||||
}
|
||||
oldNewDocNums = make(map[uint64][]uint64)
|
||||
for i, segNewDocNums := range newDocNums {
|
||||
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
|
||||
|
@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
}
|
||||
}
|
||||
|
||||
// once all the newly merged segment introductions are done,
|
||||
// its safe to unflip the removal ineligibility for the replaced
|
||||
// older segments
|
||||
for _, f := range filenames {
|
||||
s.unmarkIneligibleForRemoval(f)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
|||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return nil, 0, err
|
||||
}
|
||||
err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
|
||||
}
|
||||
|
||||
// update persisted stats
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
|
||||
|
|
|
@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() {
|
|||
var persistWatchers []*epochWatcher
|
||||
var lastPersistedEpoch, lastMergedEpoch uint64
|
||||
var ew *epochWatcher
|
||||
|
||||
var unpersistedCallbacks []index.BatchCallback
|
||||
|
||||
po, err := s.parsePersisterOptions()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
|
||||
|
@ -111,7 +114,6 @@ OUTER:
|
|||
if ew != nil && ew.epoch > lastMergedEpoch {
|
||||
lastMergedEpoch = ew.epoch
|
||||
}
|
||||
|
||||
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
|
||||
lastMergedEpoch, persistWatchers, po)
|
||||
|
||||
|
@ -150,11 +152,25 @@ OUTER:
|
|||
_ = ourSnapshot.DecRef()
|
||||
break OUTER
|
||||
}
|
||||
|
||||
// save this current snapshot's persistedCallbacks, to invoke during
|
||||
// the retry attempt
|
||||
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
|
||||
|
||||
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if unpersistedCallbacks != nil {
|
||||
// in the event of this being a retry attempt for persisting a snapshot
|
||||
// that had earlier failed, prepend the persistedCallbacks associated
|
||||
// with earlier segment(s) to the latest persistedCallbacks
|
||||
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
|
||||
unpersistedCallbacks = nil
|
||||
}
|
||||
|
||||
for i := range ourPersistedCallbacks {
|
||||
ourPersistedCallbacks[i](err)
|
||||
}
|
||||
|
@ -179,7 +195,6 @@ OUTER:
|
|||
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
|
||||
|
||||
if changed {
|
||||
s.removeOldData()
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
|
|||
return watchersNext
|
||||
}
|
||||
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
|
||||
persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
|
||||
lastMergedEpoch uint64, persistWatchers []*epochWatcher,
|
||||
po *persisterOptions) (uint64, []*epochWatcher) {
|
||||
|
||||
// first, let the watchers proceed if they lag behind
|
||||
// First, let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
|
||||
// check the merger lag by counting the segment files on disk,
|
||||
// Check the merger lag by counting the segment files on disk,
|
||||
numFilesOnDisk, _ := s.diskFileStats()
|
||||
|
||||
// On finding fewer files on disk, persister takes a short pause
|
||||
// for sufficient in-memory segments to pile up for the next
|
||||
// memory merge cum persist loop.
|
||||
// On finding too many files on disk, persister pause until the merger
|
||||
// catches up to reduce the segment file count under the threshold.
|
||||
// But if there is memory pressure, then skip this sleep maneuvers.
|
||||
numFilesOnDisk, _ := s.diskFileStats()
|
||||
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
|
||||
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
|
||||
select {
|
||||
|
@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
|
|||
return lastMergedEpoch, persistWatchers
|
||||
}
|
||||
|
||||
// Finding too many files on disk could be due to two reasons.
|
||||
// 1. Too many older snapshots awaiting the clean up.
|
||||
// 2. The merger could be lagging behind on merging the disk files.
|
||||
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
|
||||
s.removeOldData()
|
||||
numFilesOnDisk, _ = s.diskFileStats()
|
||||
}
|
||||
|
||||
// Persister pause until the merger catches up to reduce the segment
|
||||
// file count under the threshold.
|
||||
// But if there is memory pressure, then skip this sleep maneuvers.
|
||||
OUTER:
|
||||
for po.PersisterNapUnderNumFiles > 0 &&
|
||||
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
|
||||
|
@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
|
|||
}
|
||||
|
||||
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
||||
|
||||
rv := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: make(map[string][]byte),
|
||||
refs: 1,
|
||||
creator: "loadSnapshot",
|
||||
}
|
||||
|
||||
var running uint64
|
||||
c := snapshot.Cursor()
|
||||
for k, _ := c.First(); k != nil; k, _ = c.Next() {
|
||||
|
@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
|||
running += segmentSnapshot.segment.Count()
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
|
@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() {
|
|||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
|
||||
|
||||
if removed > 0 {
|
||||
err = s.removeOldZapFiles()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
|
||||
}
|
||||
err = s.removeOldZapFiles()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,12 +41,14 @@ const Version uint8 = 2
|
|||
var ErrClosed = fmt.Errorf("scorch closed")
|
||||
|
||||
type Scorch struct {
|
||||
nextSegmentID uint64
|
||||
stats Stats
|
||||
iStats internalStats
|
||||
|
||||
readOnly bool
|
||||
version uint8
|
||||
config map[string]interface{}
|
||||
analysisQueue *index.AnalysisQueue
|
||||
stats Stats
|
||||
nextSegmentID uint64
|
||||
path string
|
||||
|
||||
unsafeBatch bool
|
||||
|
@ -73,8 +75,6 @@ type Scorch struct {
|
|||
onEvent func(event Event)
|
||||
onAsyncError func(err error)
|
||||
|
||||
iStats internalStats
|
||||
|
||||
pauseLock sync.RWMutex
|
||||
|
||||
pauseCount uint64
|
||||
|
@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
|||
|
||||
// FIXME could sort ids list concurrent with analysis?
|
||||
|
||||
if len(batch.IndexOps) > 0 {
|
||||
if numUpdates > 0 {
|
||||
go func() {
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
|
@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} {
|
|||
m["CurOnDiskBytes"] = numBytesUsedDisk
|
||||
m["CurOnDiskFiles"] = numFilesOnDisk
|
||||
|
||||
s.rootLock.RLock()
|
||||
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
|
||||
s.rootLock.RUnlock()
|
||||
// TODO: consider one day removing these backwards compatible
|
||||
// names for apps using the old names
|
||||
m["updates"] = m["TotUpdates"]
|
||||
|
|
|
@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
|
|||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
type EmptyDictionaryIterator struct{}
|
||||
|
||||
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -19,7 +19,10 @@
|
|||
|
||||
package segment
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const (
|
||||
MaxVarintSize = 9
|
||||
|
@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
|
|||
}
|
||||
return b[length:], v, nil
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
|
||||
type MemUvarintReader struct {
|
||||
C int // index of next byte to read from S
|
||||
S []byte
|
||||
}
|
||||
|
||||
func NewMemUvarintReader(s []byte) *MemUvarintReader {
|
||||
return &MemUvarintReader{S: s}
|
||||
}
|
||||
|
||||
// Len returns the number of unread bytes.
|
||||
func (r *MemUvarintReader) Len() int {
|
||||
n := len(r.S) - r.C
|
||||
if n < 0 {
|
||||
return 0
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
|
||||
|
||||
// ReadUvarint reads an encoded uint64. The original code this was
|
||||
// based on is at encoding/binary/ReadUvarint().
|
||||
func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
|
||||
var x uint64
|
||||
var s uint
|
||||
var C = r.C
|
||||
var S = r.S
|
||||
|
||||
for {
|
||||
b := S[C]
|
||||
C++
|
||||
|
||||
if b < 0x80 {
|
||||
r.C = C
|
||||
|
||||
// why 63? The original code had an 'i += 1' loop var and
|
||||
// checked for i > 9 || i == 9 ...; but, we no longer
|
||||
// check for the i var, but instead check here for s,
|
||||
// which is incremented by 7. So, 7*9 == 63.
|
||||
//
|
||||
// why the "extra" >= check? The normal case is that s <
|
||||
// 63, so we check this single >= guard first so that we
|
||||
// hit the normal, nil-error return pathway sooner.
|
||||
if s >= 63 && (s > 63 || s == 63 && b > 1) {
|
||||
return 0, ErrMemUvarintReaderOverflow
|
||||
}
|
||||
|
||||
return x | uint64(b)<<s, nil
|
||||
}
|
||||
|
||||
x |= uint64(b&0x7f) << s
|
||||
s += 7
|
||||
}
|
||||
}
|
||||
|
||||
// SkipUvarint skips ahead one encoded uint64.
|
||||
func (r *MemUvarintReader) SkipUvarint() {
|
||||
for {
|
||||
b := r.S[r.C]
|
||||
r.C++
|
||||
|
||||
if b < 0x80 {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SkipBytes skips a count number of bytes.
|
||||
func (r *MemUvarintReader) SkipBytes(count int) {
|
||||
r.C = r.C + count
|
||||
}
|
||||
|
||||
func (r *MemUvarintReader) Reset(s []byte) {
|
||||
r.C = 0
|
||||
r.S = s
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string {
|
|||
s = s.Sub[0]
|
||||
}
|
||||
|
||||
if s.Op == syntax.OpLiteral {
|
||||
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
|
||||
return string(s.Rune)
|
||||
}
|
||||
|
||||
|
|
|
@ -59,6 +59,8 @@ type TermDictionary interface {
|
|||
AutomatonIterator(a vellum.Automaton,
|
||||
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
|
||||
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
|
||||
|
||||
Contains(key []byte) (bool, error)
|
||||
}
|
||||
|
||||
type DictionaryIterator interface {
|
||||
|
|
|
@ -16,6 +16,7 @@ package zap
|
|||
|
||||
import (
|
||||
"bufio"
|
||||
"github.com/couchbase/vellum"
|
||||
"math"
|
||||
"os"
|
||||
)
|
||||
|
@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
|
|||
docValueOffset: docValueOffset,
|
||||
dictLocs: dictLocs,
|
||||
fieldDvReaders: make(map[uint16]*docValueReader),
|
||||
fieldFSTs: make(map[uint16]*vellum.FST),
|
||||
}
|
||||
sb.updateSize()
|
||||
|
||||
|
|
|
@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
|
|||
return rv
|
||||
}
|
||||
|
||||
func (d *Dictionary) Contains(key []byte) (bool, error) {
|
||||
return d.fst.Contains(key)
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this dictionary
|
||||
func (d *Dictionary) Iterator() segment.DictionaryIterator {
|
||||
rv := &DictionaryIterator{
|
||||
|
@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
|
|||
}
|
||||
|
||||
// need to increment the end position to be inclusive
|
||||
endBytes := []byte(end)
|
||||
if endBytes[len(endBytes)-1] < 0xff {
|
||||
endBytes[len(endBytes)-1]++
|
||||
} else {
|
||||
endBytes = append(endBytes, 0xff)
|
||||
var endBytes []byte
|
||||
if len(end) > 0 {
|
||||
endBytes = []byte(end)
|
||||
if endBytes[len(endBytes)-1] < 0xff {
|
||||
endBytes[len(endBytes)-1]++
|
||||
} else {
|
||||
endBytes = append(endBytes, 0xff)
|
||||
}
|
||||
}
|
||||
|
||||
if d.fst != nil {
|
||||
|
|
|
@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error
|
|||
|
||||
type docVisitState struct {
|
||||
dvrs map[uint16]*docValueReader
|
||||
segment *Segment
|
||||
segment *SegmentBase
|
||||
}
|
||||
|
||||
type docValueReader struct {
|
||||
|
@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
|
|||
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
|
||||
// get the docValue offset for the given fields
|
||||
if fieldDvLocStart == fieldNotUninverted {
|
||||
return nil, fmt.Errorf("loadFieldDocValueReader: "+
|
||||
"no docValues found for field: %s", field)
|
||||
// no docValues found, nothing to do
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// read the number of chunks, and chunk offsets position
|
||||
|
@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
|
|||
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
|
||||
// acquire position of chunk offsets
|
||||
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
|
||||
} else {
|
||||
return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
|
||||
}
|
||||
|
||||
fdvIter := &docValueReader{
|
||||
|
@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
|
|||
|
||||
// VisitDocumentFieldTerms is an implementation of the
|
||||
// DocumentFieldTermVisitable interface
|
||||
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||
func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
|
||||
segment.DocVisitState, error) {
|
||||
dvs, ok := dvsIn.(*docVisitState)
|
||||
|
@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
|||
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
|
||||
// check if the chunk is already loaded
|
||||
if docInChunk != dvr.curChunkNumber() {
|
||||
err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
|
||||
err := dvr.loadDvChunk(docInChunk, s)
|
||||
if err != nil {
|
||||
return dvs, err
|
||||
}
|
||||
|
@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
|||
// VisitableDocValueFields returns the list of fields with
|
||||
// persisted doc value terms ready to be visitable using the
|
||||
// VisitDocumentFieldTerms method.
|
||||
func (s *Segment) VisitableDocValueFields() ([]string, error) {
|
||||
func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
|
||||
return s.fieldDvNames, nil
|
||||
}
|
||||
|
|
|
@ -31,6 +31,14 @@ import (
|
|||
|
||||
var DefaultFileMergerBufferSize = 1024 * 1024
|
||||
|
||||
// ValidateMerge can be set by applications to perform additional checks
|
||||
// on a new segment produced by a merge, by default this does nothing.
|
||||
// Caller should provide EITHER segments or memSegments, but not both.
|
||||
// This API is experimental and may be removed at any time.
|
||||
var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
|
||||
|
||||
// Merge takes a slice of zap segments and bit masks describing which
|
||||
|
|
|
@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100
|
|||
var NewSegmentBufferNumResultsFactor float64 = 1.0
|
||||
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
|
||||
|
||||
// ValidateDocFields can be set by applications to perform additional checks
|
||||
// on fields in a document being added to a new segment, by default it does
|
||||
// nothing.
|
||||
// This API is experimental and may be removed at any time.
|
||||
var ValidateDocFields = func(field document.Field) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
|
||||
// SegmentBase from analysis results
|
||||
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
|
||||
|
@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() (
|
|||
if opts.IncludeDocValues() {
|
||||
s.IncludeDocValues[fieldID] = true
|
||||
}
|
||||
|
||||
err := ValidateDocFields(field)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
var curr int
|
||||
|
|
|
@ -15,10 +15,8 @@
|
|||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
|
@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
|
|||
}
|
||||
|
||||
rv.postings = p
|
||||
rv.includeFreqNorm = includeFreq || includeNorm
|
||||
rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
|
||||
rv.includeLocs = includeLocs
|
||||
|
||||
if p.normBits1Hit != 0 {
|
||||
|
@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
|
|||
|
||||
// Count returns the number of items on this postings list
|
||||
func (p *PostingsList) Count() uint64 {
|
||||
var n uint64
|
||||
var n, e uint64
|
||||
if p.normBits1Hit != 0 {
|
||||
n = 1
|
||||
if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
|
||||
e = 1
|
||||
}
|
||||
} else if p.postings != nil {
|
||||
n = p.postings.GetCardinality()
|
||||
}
|
||||
var e uint64
|
||||
if p.except != nil {
|
||||
e = p.except.GetCardinality()
|
||||
}
|
||||
if n <= e {
|
||||
return 0
|
||||
if p.except != nil {
|
||||
e = p.postings.AndCardinality(p.except)
|
||||
}
|
||||
}
|
||||
return n - e
|
||||
}
|
||||
|
@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
|
|||
// PostingsIterator provides a way to iterate through the postings list
|
||||
type PostingsIterator struct {
|
||||
postings *PostingsList
|
||||
all roaring.IntIterable
|
||||
Actual roaring.IntIterable
|
||||
all roaring.IntPeekable
|
||||
Actual roaring.IntPeekable
|
||||
ActualBM *roaring.Bitmap
|
||||
|
||||
currChunk uint32
|
||||
currChunkFreqNorm []byte
|
||||
currChunkLoc []byte
|
||||
|
||||
freqNormReader *bytes.Reader
|
||||
locReader *bytes.Reader
|
||||
freqNormReader *segment.MemUvarintReader
|
||||
locReader *segment.MemUvarintReader
|
||||
|
||||
freqChunkOffsets []uint64
|
||||
freqChunkStart uint64
|
||||
|
@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
|||
end += e
|
||||
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
|
||||
if i.freqNormReader == nil {
|
||||
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
|
||||
i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
|
||||
} else {
|
||||
i.freqNormReader.Reset(i.currChunkFreqNorm)
|
||||
}
|
||||
|
@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
|||
end += e
|
||||
i.currChunkLoc = i.postings.sb.mem[start:end]
|
||||
if i.locReader == nil {
|
||||
i.locReader = bytes.NewReader(i.currChunkLoc)
|
||||
i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
|
||||
} else {
|
||||
i.locReader.Reset(i.currChunkLoc)
|
||||
}
|
||||
|
@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
|
|||
return 1, i.normBits1Hit, false, nil
|
||||
}
|
||||
|
||||
freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
|
||||
freqHasLocs, err := i.freqNormReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
|
||||
}
|
||||
|
||||
freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
|
||||
|
||||
normBits, err := binary.ReadUvarint(i.freqNormReader)
|
||||
normBits, err := i.freqNormReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
|
||||
}
|
||||
|
||||
return freq, normBits, hasLocs, err
|
||||
return freq, normBits, hasLocs, nil
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
|
||||
if i.normBits1Hit != 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
freqHasLocs, err := i.freqNormReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error reading freqHasLocs: %v", err)
|
||||
}
|
||||
|
||||
i.freqNormReader.SkipUvarint() // Skip normBits.
|
||||
|
||||
return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
|
||||
}
|
||||
|
||||
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
|
||||
|
@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
|
|||
}
|
||||
|
||||
// readLocation processes all the integers on the stream representing a single
|
||||
// location. if you care about it, pass in a non-nil location struct, and we
|
||||
// will fill it. if you don't care about it, pass in nil and we safely consume
|
||||
// the contents.
|
||||
// location.
|
||||
func (i *PostingsIterator) readLocation(l *Location) error {
|
||||
// read off field
|
||||
fieldID, err := binary.ReadUvarint(i.locReader)
|
||||
fieldID, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location field: %v", err)
|
||||
}
|
||||
// read off pos
|
||||
pos, err := binary.ReadUvarint(i.locReader)
|
||||
pos, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location pos: %v", err)
|
||||
}
|
||||
// read off start
|
||||
start, err := binary.ReadUvarint(i.locReader)
|
||||
start, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location start: %v", err)
|
||||
}
|
||||
// read off end
|
||||
end, err := binary.ReadUvarint(i.locReader)
|
||||
end, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location end: %v", err)
|
||||
}
|
||||
// read off num array pos
|
||||
numArrayPos, err := binary.ReadUvarint(i.locReader)
|
||||
numArrayPos, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location num array pos: %v", err)
|
||||
}
|
||||
|
||||
// group these together for less branching
|
||||
if l != nil {
|
||||
l.field = i.postings.sb.fieldsInv[fieldID]
|
||||
l.pos = pos
|
||||
l.start = start
|
||||
l.end = end
|
||||
if cap(l.ap) < int(numArrayPos) {
|
||||
l.ap = make([]uint64, int(numArrayPos))
|
||||
} else {
|
||||
l.ap = l.ap[:int(numArrayPos)]
|
||||
}
|
||||
l.field = i.postings.sb.fieldsInv[fieldID]
|
||||
l.pos = pos
|
||||
l.start = start
|
||||
l.end = end
|
||||
|
||||
if cap(l.ap) < int(numArrayPos) {
|
||||
l.ap = make([]uint64, int(numArrayPos))
|
||||
} else {
|
||||
l.ap = l.ap[:int(numArrayPos)]
|
||||
}
|
||||
|
||||
// read off array positions
|
||||
for k := 0; k < int(numArrayPos); k++ {
|
||||
ap, err := binary.ReadUvarint(i.locReader)
|
||||
ap, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading array position: %v", err)
|
||||
}
|
||||
if l != nil {
|
||||
l.ap[k] = ap
|
||||
}
|
||||
|
||||
l.ap[k] = ap
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
|
|||
}
|
||||
rv.locs = i.nextSegmentLocs[:0]
|
||||
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
numLocsBytes, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
|
||||
}
|
||||
|
@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() (
|
|||
if hasLocs {
|
||||
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
numLocsBytes, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, nil,
|
||||
fmt.Errorf("error reading location nextBytes numLocs: %v", err)
|
||||
}
|
||||
|
||||
// skip over all the location bytes
|
||||
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, nil, err
|
||||
}
|
||||
i.locReader.SkipBytes(int(numLocsBytes))
|
||||
|
||||
endLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||
bytesLoc = i.currChunkLoc[startLoc:endLoc]
|
||||
|
@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
|
|||
return i.nextDocNumAtOrAfterClean(atOrAfter)
|
||||
}
|
||||
|
||||
n := i.Actual.Next()
|
||||
for uint64(n) < atOrAfter && i.Actual.HasNext() {
|
||||
n = i.Actual.Next()
|
||||
}
|
||||
if uint64(n) < atOrAfter {
|
||||
i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
|
||||
|
||||
if !i.Actual.HasNext() {
|
||||
// couldn't find anything
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
n := i.Actual.Next()
|
||||
allN := i.all.Next()
|
||||
|
||||
nChunk := n / i.postings.sb.chunkFactor
|
||||
|
@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
|
|||
// no deletions) where the all bitmap is the same as the actual bitmap
|
||||
func (i *PostingsIterator) nextDocNumAtOrAfterClean(
|
||||
atOrAfter uint64) (uint64, bool, error) {
|
||||
n := i.Actual.Next()
|
||||
|
||||
if !i.includeFreqNorm {
|
||||
for uint64(n) < atOrAfter && i.Actual.HasNext() {
|
||||
n = i.Actual.Next()
|
||||
}
|
||||
i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
|
||||
|
||||
if uint64(n) < atOrAfter {
|
||||
if !i.Actual.HasNext() {
|
||||
return 0, false, nil // couldn't find anything
|
||||
}
|
||||
|
||||
return uint64(n), true, nil
|
||||
return uint64(i.Actual.Next()), true, nil
|
||||
}
|
||||
|
||||
// freq-norm's needed, so maintain freq-norm chunk reader
|
||||
sameChunkNexts := 0 // # of times we called Next() in the same chunk
|
||||
|
||||
n := i.Actual.Next()
|
||||
nChunk := n / i.postings.sb.chunkFactor
|
||||
|
||||
for uint64(n) < atOrAfter && i.Actual.HasNext() {
|
||||
|
@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
|
|||
}
|
||||
|
||||
// read off freq/offsets even though we don't care about them
|
||||
_, _, hasLocs, err := i.readFreqNormHasLocs()
|
||||
hasLocs, err := i.skipFreqNormReadHasLocs()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if i.includeLocs && hasLocs {
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
numLocsBytes, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location numLocsBytes: %v", err)
|
||||
}
|
||||
|
||||
// skip over all the location bytes
|
||||
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
i.locReader.SkipBytes(int(numLocsBytes))
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -20,8 +20,8 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
|
@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int
|
|||
|
||||
func init() {
|
||||
var sb SegmentBase
|
||||
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
|
||||
reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
|
||||
}
|
||||
|
||||
// Open returns a zap impl of a segment
|
||||
|
@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) {
|
|||
mem: mm[0 : len(mm)-FooterSize],
|
||||
fieldsMap: make(map[string]uint16),
|
||||
fieldDvReaders: make(map[uint16]*docValueReader),
|
||||
fieldFSTs: make(map[uint16]*vellum.FST),
|
||||
},
|
||||
f: f,
|
||||
mm: mm,
|
||||
|
@ -101,6 +102,9 @@ type SegmentBase struct {
|
|||
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
|
||||
fieldDvNames []string // field names cached in fieldDvReaders
|
||||
size uint64
|
||||
|
||||
m sync.Mutex
|
||||
fieldFSTs map[uint16]*vellum.FST
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) Size() int {
|
||||
|
@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
|
|||
|
||||
dictStart := sb.dictLocs[rv.fieldID]
|
||||
if dictStart > 0 {
|
||||
// read the length of the vellum data
|
||||
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
|
||||
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
|
||||
if fstBytes != nil {
|
||||
var ok bool
|
||||
sb.m.Lock()
|
||||
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
|
||||
// read the length of the vellum data
|
||||
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
|
||||
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
|
||||
rv.fst, err = vellum.Load(fstBytes)
|
||||
if err != nil {
|
||||
sb.m.Unlock()
|
||||
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
|
||||
}
|
||||
rv.fstReader, err = rv.fst.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
|
||||
}
|
||||
|
||||
sb.fieldFSTs[rv.fieldID] = rv.fst
|
||||
}
|
||||
|
||||
sb.m.Unlock()
|
||||
rv.fstReader, err = rv.fst.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
|
|||
}
|
||||
|
||||
func (s *SegmentBase) loadDvReaders() error {
|
||||
if s.docValueOffset == fieldNotUninverted {
|
||||
if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error {
|
|||
}
|
||||
read += uint64(n)
|
||||
|
||||
fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
|
||||
fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if fieldDvReader != nil {
|
||||
s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
|
||||
s.fieldDvNames = append(s.fieldDvNames, field)
|
||||
|
|
|
@ -28,13 +28,14 @@ import (
|
|||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/couchbase/vellum"
|
||||
lev2 "github.com/couchbase/vellum/levenshtein2"
|
||||
lev "github.com/couchbase/vellum/levenshtein"
|
||||
)
|
||||
|
||||
// re usable, threadsafe levenshtein builders
|
||||
var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
|
||||
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
|
||||
|
||||
type asynchSegmentResult struct {
|
||||
dict segment.TermDictionary
|
||||
dictItr segment.DictionaryIterator
|
||||
|
||||
index int
|
||||
|
@ -51,11 +52,11 @@ func init() {
|
|||
var is interface{} = IndexSnapshot{}
|
||||
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
|
||||
var err error
|
||||
lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true)
|
||||
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
|
||||
}
|
||||
lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true)
|
||||
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
|
||||
}
|
||||
|
@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() {
|
|||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
|
||||
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
|
||||
randomLookup bool) (*IndexSnapshotFieldDict, error) {
|
||||
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
|
@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
if err != nil {
|
||||
results <- &asynchSegmentResult{err: err}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
||||
if randomLookup {
|
||||
results <- &asynchSegmentResult{dict: dict}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
||||
}
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
if asr.err != nil && err == nil {
|
||||
err = asr.err
|
||||
} else {
|
||||
next, err2 := asr.dictItr.Next()
|
||||
if err2 != nil && err == nil {
|
||||
err = err2
|
||||
}
|
||||
if next != nil {
|
||||
if !randomLookup {
|
||||
next, err2 := asr.dictItr.Next()
|
||||
if err2 != nil && err == nil {
|
||||
err = err2
|
||||
}
|
||||
if next != nil {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
itr: asr.dictItr,
|
||||
curr: *next,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
itr: asr.dictItr,
|
||||
curr: *next,
|
||||
dict: asr.dict,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// prepare heap
|
||||
heap.Init(rv)
|
||||
|
||||
if !randomLookup {
|
||||
// prepare heap
|
||||
heap.Init(rv)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.Iterator()
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
|
||||
endTerm []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.RangeIterator(string(startTerm), string(endTerm))
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictPrefix(field string,
|
||||
termPrefix []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.PrefixIterator(string(termPrefix))
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRegexp(field string,
|
||||
|
@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
|
|||
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) getLevAutomaton(term string,
|
||||
|
@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
|
|||
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictOnly(field string,
|
||||
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.OnlyIterator(onlyTerms, includeCount)
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, nil, true)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
|
||||
|
|
|
@ -22,6 +22,7 @@ import (
|
|||
)
|
||||
|
||||
type segmentDictCursor struct {
|
||||
dict segment.TermDictionary
|
||||
itr segment.DictionaryIterator
|
||||
curr index.DictEntry
|
||||
}
|
||||
|
@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
|
|||
func (i *IndexSnapshotFieldDict) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
for _, cursor := range i.cursors {
|
||||
if found, _ := cursor.dict.Contains(key); found {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
|
|
@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
|
|||
}
|
||||
|
||||
type cachedDocs struct {
|
||||
size uint64
|
||||
m sync.Mutex // As the cache is asynchronously prepared, need a lock
|
||||
cache map[string]*cachedFieldDocs // Keyed by field
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
|
||||
|
|
|
@ -107,6 +107,9 @@ type Stats struct {
|
|||
TotFileMergeIntroductionsDone uint64
|
||||
TotFileMergeIntroductionsSkipped uint64
|
||||
|
||||
CurFilesIneligibleForRemoval uint64
|
||||
TotSnapshotsRemovedFromMetaStore uint64
|
||||
|
||||
TotMemMergeBeg uint64
|
||||
TotMemMergeErr uint64
|
||||
TotMemMergeDone uint64
|
||||
|
|
|
@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
|
|||
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
||||
// do analysis before acquiring write lock
|
||||
analysisStart := time.Now()
|
||||
numPlainTextBytes := doc.NumPlainTextBytes()
|
||||
resultChan := make(chan *index.AnalysisResult)
|
||||
aw := index.NewAnalysisWork(udc, doc, resultChan)
|
||||
|
||||
|
@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
|
||||
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
|
||||
// start a writer for this update
|
||||
indexStart := time.Now()
|
||||
var kvwriter store.KVWriter
|
||||
|
@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
|||
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
|
||||
if err == nil {
|
||||
atomic.AddUint64(&udc.stats.updates, 1)
|
||||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
|
||||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
|
||||
} else {
|
||||
atomic.AddUint64(&udc.stats.errors, 1)
|
||||
}
|
||||
|
@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
|
|||
}
|
||||
|
||||
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
||||
persistedCallback := batch.PersistedCallback()
|
||||
if persistedCallback != nil {
|
||||
defer persistedCallback(err)
|
||||
}
|
||||
analysisStart := time.Now()
|
||||
|
||||
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
|
||||
|
@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
}
|
||||
}
|
||||
|
||||
if len(batch.IndexOps) > 0 {
|
||||
if numUpdates > 0 {
|
||||
go func() {
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
|
@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
atomic.AddUint64(&udc.stats.errors, 1)
|
||||
}
|
||||
|
||||
persistedCallback := batch.PersistedCallback()
|
||||
if persistedCallback != nil {
|
||||
persistedCallback(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
|
|||
Sort: req.Sort.Copy(),
|
||||
IncludeLocations: req.IncludeLocations,
|
||||
Score: req.Score,
|
||||
SearchAfter: req.SearchAfter,
|
||||
SearchBefore: req.SearchBefore,
|
||||
}
|
||||
return &rv
|
||||
}
|
||||
|
@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
|||
searchStart := time.Now()
|
||||
asyncResults := make(chan *asyncSearchResult, len(indexes))
|
||||
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
req.Sort.Reverse()
|
||||
req.SearchAfter = req.SearchBefore
|
||||
req.SearchBefore = nil
|
||||
}
|
||||
|
||||
// run search on each index in separate go routine
|
||||
var waitGroup sync.WaitGroup
|
||||
|
||||
|
@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
|||
|
||||
// sort all hits with the requested order
|
||||
if len(req.Sort) > 0 {
|
||||
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits)
|
||||
sorter := newSearchHitSorter(req.Sort, sr.Hits)
|
||||
sort.Sort(sorter)
|
||||
}
|
||||
|
||||
|
@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
|
|||
sr.Facets.Fixup(name, fr.Size)
|
||||
}
|
||||
|
||||
if reverseQueryExecution {
|
||||
// reverse the sort back to the original
|
||||
req.Sort.Reverse()
|
||||
// resort using the original order
|
||||
mhs := newSearchHitSorter(req.Sort, sr.Hits)
|
||||
sort.Sort(mhs)
|
||||
// reset request
|
||||
req.SearchBefore = req.SearchAfter
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
// fix up original request
|
||||
sr.Request = req
|
||||
searchDuration := time.Since(searchStart)
|
||||
|
@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error {
|
|||
defer f.index.mutex.RUnlock()
|
||||
return f.fieldDict.Close()
|
||||
}
|
||||
|
||||
type multiSearchHitSorter struct {
|
||||
hits search.DocumentMatchCollection
|
||||
sort search.SortOrder
|
||||
cachedScoring []bool
|
||||
cachedDesc []bool
|
||||
}
|
||||
|
||||
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
|
||||
return &multiSearchHitSorter{
|
||||
sort: sort,
|
||||
hits: hits,
|
||||
cachedScoring: sort.CacheIsScore(),
|
||||
cachedDesc: sort.CacheDescending(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
|
||||
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
|
||||
func (m *multiSearchHitSorter) Less(i, j int) bool {
|
||||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
|
||||
return c < 0
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort)
|
||||
var reverseQueryExecution bool
|
||||
if req.SearchBefore != nil {
|
||||
reverseQueryExecution = true
|
||||
req.Sort.Reverse()
|
||||
req.SearchAfter = req.SearchBefore
|
||||
req.SearchBefore = nil
|
||||
}
|
||||
|
||||
var coll *collector.TopNCollector
|
||||
if req.SearchAfter != nil {
|
||||
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
|
||||
} else {
|
||||
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
|
||||
}
|
||||
|
||||
// open a reader for this search
|
||||
indexReader, err := i.i.Reader()
|
||||
|
@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
facetsBuilder.Add(facetName, facetBuilder)
|
||||
}
|
||||
}
|
||||
collector.SetFacetsBuilder(facetsBuilder)
|
||||
coll.SetFacetsBuilder(facetsBuilder)
|
||||
}
|
||||
|
||||
memNeeded := memNeededForSearch(req, searcher, collector)
|
||||
memNeeded := memNeededForSearch(req, searcher, coll)
|
||||
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
|
||||
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
|
||||
err = cbF(memNeeded)
|
||||
|
@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
}
|
||||
}
|
||||
|
||||
err = collector.Collect(ctx, searcher, indexReader)
|
||||
err = coll.Collect(ctx, searcher, indexReader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hits := collector.Results()
|
||||
hits := coll.Results()
|
||||
|
||||
var highlighter highlight.Highlighter
|
||||
|
||||
|
@ -542,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
}
|
||||
|
||||
for _, hit := range hits {
|
||||
if len(req.Fields) > 0 || highlighter != nil {
|
||||
doc, err := indexReader.Document(hit.ID)
|
||||
if err == nil && doc != nil {
|
||||
if len(req.Fields) > 0 {
|
||||
fieldsToLoad := deDuplicate(req.Fields)
|
||||
for _, f := range fieldsToLoad {
|
||||
for _, docF := range doc.Fields {
|
||||
if f == "*" || docF.Name() == f {
|
||||
var value interface{}
|
||||
switch docF := docF.(type) {
|
||||
case *document.TextField:
|
||||
value = string(docF.Value())
|
||||
case *document.NumericField:
|
||||
num, err := docF.Number()
|
||||
if err == nil {
|
||||
value = num
|
||||
}
|
||||
case *document.DateTimeField:
|
||||
datetime, err := docF.DateTime()
|
||||
if err == nil {
|
||||
value = datetime.Format(time.RFC3339)
|
||||
}
|
||||
case *document.BooleanField:
|
||||
boolean, err := docF.Boolean()
|
||||
if err == nil {
|
||||
value = boolean
|
||||
}
|
||||
case *document.GeoPointField:
|
||||
lon, err := docF.Lon()
|
||||
if err == nil {
|
||||
lat, err := docF.Lat()
|
||||
if err == nil {
|
||||
value = []float64{lon, lat}
|
||||
}
|
||||
}
|
||||
}
|
||||
if value != nil {
|
||||
hit.AddFieldValue(docF.Name(), value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if highlighter != nil {
|
||||
highlightFields := req.Highlight.Fields
|
||||
if highlightFields == nil {
|
||||
// add all fields with matches
|
||||
highlightFields = make([]string, 0, len(hit.Locations))
|
||||
for k := range hit.Locations {
|
||||
highlightFields = append(highlightFields, k)
|
||||
}
|
||||
}
|
||||
for _, hf := range highlightFields {
|
||||
highlighter.BestFragmentsInField(hit, doc, hf, 1)
|
||||
}
|
||||
}
|
||||
} else if doc == nil {
|
||||
// unexpected case, a doc ID that was found as a search hit
|
||||
// was unable to be found during document lookup
|
||||
return nil, ErrorIndexReadInconsistency
|
||||
}
|
||||
}
|
||||
if i.name != "" {
|
||||
hit.Index = i.name
|
||||
}
|
||||
err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddUint64(&i.stats.searches, 1)
|
||||
|
@ -618,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
logger.Printf("slow search took %s - %v", searchDuration, req)
|
||||
}
|
||||
|
||||
if reverseQueryExecution {
|
||||
// reverse the sort back to the original
|
||||
req.Sort.Reverse()
|
||||
// resort using the original order
|
||||
mhs := newSearchHitSorter(req.Sort, hits)
|
||||
sort.Sort(mhs)
|
||||
// reset request
|
||||
req.SearchBefore = req.SearchAfter
|
||||
req.SearchAfter = nil
|
||||
}
|
||||
|
||||
return &SearchResult{
|
||||
Status: &SearchStatus{
|
||||
Total: 1,
|
||||
|
@ -625,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
|
|||
},
|
||||
Request: req,
|
||||
Hits: hits,
|
||||
Total: collector.Total(),
|
||||
MaxScore: collector.MaxScore(),
|
||||
Total: coll.Total(),
|
||||
MaxScore: coll.MaxScore(),
|
||||
Took: searchDuration,
|
||||
Facets: collector.FacetResults(),
|
||||
Facets: coll.FacetResults(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
|
||||
indexName string, r index.IndexReader,
|
||||
highlighter highlight.Highlighter) error {
|
||||
if len(req.Fields) > 0 || highlighter != nil {
|
||||
doc, err := r.Document(hit.ID)
|
||||
if err == nil && doc != nil {
|
||||
if len(req.Fields) > 0 {
|
||||
fieldsToLoad := deDuplicate(req.Fields)
|
||||
for _, f := range fieldsToLoad {
|
||||
for _, docF := range doc.Fields {
|
||||
if f == "*" || docF.Name() == f {
|
||||
var value interface{}
|
||||
switch docF := docF.(type) {
|
||||
case *document.TextField:
|
||||
value = string(docF.Value())
|
||||
case *document.NumericField:
|
||||
num, err := docF.Number()
|
||||
if err == nil {
|
||||
value = num
|
||||
}
|
||||
case *document.DateTimeField:
|
||||
datetime, err := docF.DateTime()
|
||||
if err == nil {
|
||||
value = datetime.Format(time.RFC3339)
|
||||
}
|
||||
case *document.BooleanField:
|
||||
boolean, err := docF.Boolean()
|
||||
if err == nil {
|
||||
value = boolean
|
||||
}
|
||||
case *document.GeoPointField:
|
||||
lon, err := docF.Lon()
|
||||
if err == nil {
|
||||
lat, err := docF.Lat()
|
||||
if err == nil {
|
||||
value = []float64{lon, lat}
|
||||
}
|
||||
}
|
||||
}
|
||||
if value != nil {
|
||||
hit.AddFieldValue(docF.Name(), value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if highlighter != nil {
|
||||
highlightFields := req.Highlight.Fields
|
||||
if highlightFields == nil {
|
||||
// add all fields with matches
|
||||
highlightFields = make([]string, 0, len(hit.Locations))
|
||||
for k := range hit.Locations {
|
||||
highlightFields = append(highlightFields, k)
|
||||
}
|
||||
}
|
||||
for _, hf := range highlightFields {
|
||||
highlighter.BestFragmentsInField(hit, doc, hf, 1)
|
||||
}
|
||||
}
|
||||
} else if doc == nil {
|
||||
// unexpected case, a doc ID that was found as a search hit
|
||||
// was unable to be found during document lookup
|
||||
return ErrorIndexReadInconsistency
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Fields returns the name of all the fields this
|
||||
// Index has operated on.
|
||||
func (i *indexImpl) Fields() (fields []string, err error) {
|
||||
|
@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string {
|
|||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
type searchHitSorter struct {
|
||||
hits search.DocumentMatchCollection
|
||||
sort search.SortOrder
|
||||
cachedScoring []bool
|
||||
cachedDesc []bool
|
||||
}
|
||||
|
||||
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
|
||||
return &searchHitSorter{
|
||||
sort: sort,
|
||||
hits: hits,
|
||||
cachedScoring: sort.CacheIsScore(),
|
||||
cachedDesc: sort.CacheDescending(),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *searchHitSorter) Len() int { return len(m.hits) }
|
||||
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
|
||||
func (m *searchHitSorter) Less(i, j int) bool {
|
||||
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
|
||||
return c < 0
|
||||
}
|
||||
|
|
|
@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
|||
if !propertyValue.IsNil() {
|
||||
switch property := property.(type) {
|
||||
case encoding.TextMarshaler:
|
||||
|
||||
txt, err := property.MarshalText()
|
||||
if err == nil && subDocMapping != nil {
|
||||
// index by explicit mapping
|
||||
// ONLY process TextMarshaler if there is an explicit mapping
|
||||
// AND all of the fiels are of type text
|
||||
// OTHERWISE process field without TextMarshaler
|
||||
if subDocMapping != nil {
|
||||
allFieldsText := true
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
if fieldMapping.Type == "text" {
|
||||
fieldMapping.processString(string(txt), pathString, path, indexes, context)
|
||||
if fieldMapping.Type != "text" {
|
||||
allFieldsText = false
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
txt, err := property.MarshalText()
|
||||
if err == nil && allFieldsText {
|
||||
txtStr := string(txt)
|
||||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
fieldMapping.processString(txtStr, pathString, path, indexes, context)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
default:
|
||||
dm.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
|
|
|
@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
|
|||
type PrefixCoded []byte
|
||||
|
||||
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
|
||||
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
|
||||
rv PrefixCoded, preallocRest []byte, err error) {
|
||||
if shift > 63 {
|
||||
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
|
||||
return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
|
||||
}
|
||||
|
||||
nChars := ((63 - shift) / 7) + 1
|
||||
rv := make(PrefixCoded, nChars+1)
|
||||
|
||||
size := int(nChars + 1)
|
||||
if len(prealloc) >= size {
|
||||
rv = PrefixCoded(prealloc[0:size])
|
||||
preallocRest = prealloc[size:]
|
||||
} else {
|
||||
rv = make(PrefixCoded, size)
|
||||
}
|
||||
|
||||
rv[0] = ShiftStartInt64 + byte(shift)
|
||||
|
||||
sortableBits := int64(uint64(in) ^ 0x8000000000000000)
|
||||
|
@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
|
|||
nChars--
|
||||
sortableBits = int64(uint64(sortableBits) >> 7)
|
||||
}
|
||||
return rv, nil
|
||||
|
||||
return rv, preallocRest, nil
|
||||
}
|
||||
|
||||
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {
|
||||
|
|
|
@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) {
|
|||
// result score explanations.
|
||||
// Sort describes the desired order for the results to be returned.
|
||||
// Score controls the kind of scoring performed
|
||||
// SearchAfter supports deep paging by providing a minimum sort key
|
||||
// SearchBefore supports deep paging by providing a maximum sort key
|
||||
//
|
||||
// A special field named "*" can be used to return all fields.
|
||||
type SearchRequest struct {
|
||||
|
@ -275,6 +277,8 @@ type SearchRequest struct {
|
|||
Sort search.SortOrder `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score,omitempty"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
}
|
||||
|
||||
func (r *SearchRequest) Validate() error {
|
||||
|
@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error {
|
|||
}
|
||||
}
|
||||
|
||||
if r.SearchAfter != nil && r.SearchBefore != nil {
|
||||
return fmt.Errorf("cannot use search after and search before together")
|
||||
}
|
||||
|
||||
if r.SearchAfter != nil {
|
||||
if r.From != 0 {
|
||||
return fmt.Errorf("cannot use search after with from !=0")
|
||||
}
|
||||
if len(r.SearchAfter) != len(r.Sort) {
|
||||
return fmt.Errorf("search after must have same size as sort order")
|
||||
}
|
||||
}
|
||||
if r.SearchBefore != nil {
|
||||
if r.From != 0 {
|
||||
return fmt.Errorf("cannot use search before with from !=0")
|
||||
}
|
||||
if len(r.SearchBefore) != len(r.Sort) {
|
||||
return fmt.Errorf("search before must have same size as sort order")
|
||||
}
|
||||
}
|
||||
|
||||
return r.Facets.Validate()
|
||||
}
|
||||
|
||||
|
@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
|
|||
r.Sort = order
|
||||
}
|
||||
|
||||
// SetSearchAfter sets the request to skip over hits with a sort
|
||||
// value less than the provided sort after key
|
||||
func (r *SearchRequest) SetSearchAfter(after []string) {
|
||||
r.SearchAfter = after
|
||||
}
|
||||
|
||||
// SetSearchBefore sets the request to skip over hits with a sort
|
||||
// value greater than the provided sort before key
|
||||
func (r *SearchRequest) SetSearchBefore(before []string) {
|
||||
r.SearchBefore = before
|
||||
}
|
||||
|
||||
// UnmarshalJSON deserializes a JSON representation of
|
||||
// a SearchRequest
|
||||
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
||||
|
@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
|||
Sort []json.RawMessage `json:"sort"`
|
||||
IncludeLocations bool `json:"includeLocations"`
|
||||
Score string `json:"score"`
|
||||
SearchAfter []string `json:"search_after"`
|
||||
SearchBefore []string `json:"search_before"`
|
||||
}
|
||||
|
||||
err := json.Unmarshal(input, &temp)
|
||||
|
@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
|||
r.Facets = temp.Facets
|
||||
r.IncludeLocations = temp.IncludeLocations
|
||||
r.Score = temp.Score
|
||||
r.SearchAfter = temp.SearchAfter
|
||||
r.SearchBefore = temp.SearchBefore
|
||||
r.Query, err = query.ParseQuery(temp.Q)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -69,6 +69,7 @@ type TopNCollector struct {
|
|||
lowestMatchOutsideResults *search.DocumentMatch
|
||||
updateFieldVisitor index.DocumentFieldTermVisitor
|
||||
dvReader index.DocValueReader
|
||||
searchAfter *search.DocumentMatch
|
||||
}
|
||||
|
||||
// CheckDoneEvery controls how frequently we check the context deadline
|
||||
|
@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024)
|
|||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
return newTopNCollector(size, skip, sort)
|
||||
}
|
||||
|
||||
// NewTopNCollector builds a collector to find the top 'size' hits
|
||||
// skipping over the first 'skip' hits
|
||||
// ordering hits by the provided sort order
|
||||
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
|
||||
rv := newTopNCollector(size, 0, sort)
|
||||
rv.searchAfter = &search.DocumentMatch{
|
||||
Sort: after,
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
|
||||
hc := &TopNCollector{size: size, skip: skip, sort: sort}
|
||||
|
||||
// pre-allocate space on the store to avoid reslicing
|
||||
|
@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
|
|||
searchContext := &search.SearchContext{
|
||||
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
|
||||
Collector: hc,
|
||||
IndexReader: reader,
|
||||
}
|
||||
|
||||
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
|
||||
|
@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler(
|
|||
if d == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// support search after based pagination,
|
||||
// if this hit is <= the search after sort key
|
||||
// we should skip it
|
||||
if hc.searchAfter != nil {
|
||||
// exact sort order matches use hit number to break tie
|
||||
// but we want to allow for exact match, so we pretend
|
||||
hc.searchAfter.HitNumber = d.HitNumber
|
||||
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// optimization, we track lowest sorting hit already removed from heap
|
||||
// with this one comparison, we can avoid all heap operations if
|
||||
// this hit would have been added and then immediately removed
|
||||
|
|
|
@ -41,6 +41,14 @@ type BleveQueryTime struct {
|
|||
time.Time
|
||||
}
|
||||
|
||||
var MinRFC3339CompatibleTime time.Time
|
||||
var MaxRFC3339CompatibleTime time.Time
|
||||
|
||||
func init() {
|
||||
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
|
||||
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
|
||||
}
|
||||
|
||||
func queryTimeFromString(t string) (time.Time, error) {
|
||||
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
|
||||
if err != nil {
|
||||
|
@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
|
|||
min := math.Inf(-1)
|
||||
max := math.Inf(1)
|
||||
if !q.Start.IsZero() {
|
||||
min = numeric.Int64ToFloat64(q.Start.UnixNano())
|
||||
if !isDatetimeCompatible(q.Start) {
|
||||
// overflow
|
||||
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
|
||||
}
|
||||
startInt64 := q.Start.UnixNano()
|
||||
min = numeric.Int64ToFloat64(startInt64)
|
||||
}
|
||||
if !q.End.IsZero() {
|
||||
max = numeric.Int64ToFloat64(q.End.UnixNano())
|
||||
if !isDatetimeCompatible(q.End) {
|
||||
// overflow
|
||||
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
|
||||
}
|
||||
endInt64 := q.End.UnixNano()
|
||||
max = numeric.Int64ToFloat64(endInt64)
|
||||
}
|
||||
|
||||
return &min, &max, nil
|
||||
|
@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isDatetimeCompatible(t BleveQueryTime) bool {
|
||||
if QueryDateTimeFormat == time.RFC3339 &&
|
||||
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
|
|||
|
||||
if len(ss) < 1 {
|
||||
return searcher.NewMatchNoneSearcher(i)
|
||||
} else if len(ss) == 1 && int(q.Min) == ss[0].Min() {
|
||||
// apply optimization only if both conditions below are satisfied:
|
||||
// - disjunction searcher has only 1 child searcher
|
||||
// - parent searcher's min setting is equal to child searcher's min
|
||||
|
||||
return ss[0], nil
|
||||
}
|
||||
|
||||
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
|
||||
|
|
94
vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go
generated
vendored
Normal file
94
vendor/github.com/blevesearch/bleve/search/query/geo_boundingpolygon.go
generated
vendored
Normal file
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package query
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/geo"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/mapping"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/searcher"
|
||||
)
|
||||
|
||||
type GeoBoundingPolygonQuery struct {
|
||||
Points []geo.Point `json:"polygon_points"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
}
|
||||
|
||||
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
|
||||
return &GeoBoundingPolygonQuery{
|
||||
Points: points}
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
|
||||
boost := Boost(b)
|
||||
q.BoostVal = &boost
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) Boost() float64 {
|
||||
return q.BoostVal.Value()
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) SetField(f string) {
|
||||
q.FieldVal = f
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) Field() string {
|
||||
return q.FieldVal
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
|
||||
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
|
||||
field := q.FieldVal
|
||||
if q.FieldVal == "" {
|
||||
field = m.DefaultSearchField()
|
||||
}
|
||||
|
||||
return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) Validate() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
|
||||
tmp := struct {
|
||||
Points []interface{} `json:"polygon_points"`
|
||||
FieldVal string `json:"field,omitempty"`
|
||||
BoostVal *Boost `json:"boost,omitempty"`
|
||||
}{}
|
||||
err := json.Unmarshal(data, &tmp)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
q.Points = make([]geo.Point, 0, len(tmp.Points))
|
||||
for _, i := range tmp.Points {
|
||||
// now use our generic point parsing code from the geo package
|
||||
lon, lat, found := geo.ExtractGeoPoint(i)
|
||||
if !found {
|
||||
return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
|
||||
}
|
||||
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
|
||||
}
|
||||
|
||||
q.FieldVal = tmp.FieldVal
|
||||
q.BoostVal = tmp.BoostVal
|
||||
return nil
|
||||
}
|
|
@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) {
|
|||
}
|
||||
return &rv, nil
|
||||
}
|
||||
_, hasPoints := tmp["polygon_points"]
|
||||
if hasPoints {
|
||||
var rv GeoBoundingPolygonQuery
|
||||
err := json.Unmarshal(input, &rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown query type")
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ type TermQueryScorer struct {
|
|||
idf float64
|
||||
options search.SearcherOptions
|
||||
idfExplanation *search.Explanation
|
||||
includeScore bool
|
||||
queryNorm float64
|
||||
queryWeight float64
|
||||
queryWeightExplanation *search.Explanation
|
||||
|
@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int {
|
|||
|
||||
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
|
||||
rv := TermQueryScorer{
|
||||
queryTerm: string(queryTerm),
|
||||
queryField: queryField,
|
||||
queryBoost: queryBoost,
|
||||
docTerm: docTerm,
|
||||
docTotal: docTotal,
|
||||
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
|
||||
options: options,
|
||||
queryWeight: 1.0,
|
||||
queryTerm: string(queryTerm),
|
||||
queryField: queryField,
|
||||
queryBoost: queryBoost,
|
||||
docTerm: docTerm,
|
||||
docTotal: docTotal,
|
||||
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
|
||||
options: options,
|
||||
queryWeight: 1.0,
|
||||
includeScore: options.Score != "none",
|
||||
}
|
||||
|
||||
if options.Explain {
|
||||
|
@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
|
|||
}
|
||||
|
||||
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
|
||||
var scoreExplanation *search.Explanation
|
||||
|
||||
// need to compute score
|
||||
var tf float64
|
||||
if termMatch.Freq < MaxSqrtCache {
|
||||
tf = SqrtCache[int(termMatch.Freq)]
|
||||
} else {
|
||||
tf = math.Sqrt(float64(termMatch.Freq))
|
||||
}
|
||||
score := tf * termMatch.Norm * s.idf
|
||||
|
||||
if s.options.Explain {
|
||||
childrenExplanations := make([]*search.Explanation, 3)
|
||||
childrenExplanations[0] = &search.Explanation{
|
||||
Value: tf,
|
||||
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
// perform any score computations only when needed
|
||||
if s.includeScore || s.options.Explain {
|
||||
var scoreExplanation *search.Explanation
|
||||
var tf float64
|
||||
if termMatch.Freq < MaxSqrtCache {
|
||||
tf = SqrtCache[int(termMatch.Freq)]
|
||||
} else {
|
||||
tf = math.Sqrt(float64(termMatch.Freq))
|
||||
}
|
||||
childrenExplanations[1] = &search.Explanation{
|
||||
Value: termMatch.Norm,
|
||||
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
|
||||
}
|
||||
childrenExplanations[2] = s.idfExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
score := tf * termMatch.Norm * s.idf
|
||||
|
||||
// if the query weight isn't 1, multiply
|
||||
if s.queryWeight != 1.0 {
|
||||
score = score * s.queryWeight
|
||||
if s.options.Explain {
|
||||
childExplanations := make([]*search.Explanation, 2)
|
||||
childExplanations[0] = s.queryWeightExplanation
|
||||
childExplanations[1] = scoreExplanation
|
||||
childrenExplanations := make([]*search.Explanation, 3)
|
||||
childrenExplanations[0] = &search.Explanation{
|
||||
Value: tf,
|
||||
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
|
||||
}
|
||||
childrenExplanations[1] = &search.Explanation{
|
||||
Value: termMatch.Norm,
|
||||
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
|
||||
}
|
||||
childrenExplanations[2] = s.idfExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
|
||||
Children: childExplanations,
|
||||
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
|
||||
Children: childrenExplanations,
|
||||
}
|
||||
}
|
||||
|
||||
// if the query weight isn't 1, multiply
|
||||
if s.queryWeight != 1.0 {
|
||||
score = score * s.queryWeight
|
||||
if s.options.Explain {
|
||||
childExplanations := make([]*search.Explanation, 2)
|
||||
childExplanations[0] = s.queryWeightExplanation
|
||||
childExplanations[1] = scoreExplanation
|
||||
scoreExplanation = &search.Explanation{
|
||||
Value: score,
|
||||
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
|
||||
Children: childExplanations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if s.includeScore {
|
||||
rv.Score = score
|
||||
}
|
||||
|
||||
if s.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
}
|
||||
|
||||
rv := ctx.DocumentMatchPool.Get()
|
||||
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
|
||||
rv.Score = score
|
||||
if s.options.Explain {
|
||||
rv.Expl = scoreExplanation
|
||||
}
|
||||
|
||||
if len(termMatch.Vectors) > 0 {
|
||||
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
|
||||
|
|
|
@ -17,6 +17,7 @@ package search
|
|||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/size"
|
||||
|
@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func (ap ArrayPositions) Compare(other ArrayPositions) int {
|
||||
for i, p := range ap {
|
||||
if i >= len(other) {
|
||||
return 1
|
||||
}
|
||||
if p < other[i] {
|
||||
return -1
|
||||
}
|
||||
if p > other[i] {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
if len(ap) < len(other) {
|
||||
return -1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type Location struct {
|
||||
// Pos is the position of the term within the field, starting at 1
|
||||
Pos uint64 `json:"pos"`
|
||||
|
@ -68,6 +87,46 @@ func (l *Location) Size() int {
|
|||
|
||||
type Locations []*Location
|
||||
|
||||
func (p Locations) Len() int { return len(p) }
|
||||
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
|
||||
func (p Locations) Less(i, j int) bool {
|
||||
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
|
||||
if c < 0 {
|
||||
return true
|
||||
}
|
||||
if c > 0 {
|
||||
return false
|
||||
}
|
||||
return p[i].Pos < p[j].Pos
|
||||
}
|
||||
|
||||
func (p Locations) Dedupe() Locations { // destructive!
|
||||
if len(p) <= 1 {
|
||||
return p
|
||||
}
|
||||
|
||||
sort.Sort(p)
|
||||
|
||||
slow := 0
|
||||
|
||||
for _, pfast := range p {
|
||||
pslow := p[slow]
|
||||
if pslow.Pos == pfast.Pos &&
|
||||
pslow.Start == pfast.Start &&
|
||||
pslow.End == pfast.End &&
|
||||
pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
|
||||
continue // duplicate, so only move fast ahead
|
||||
}
|
||||
|
||||
slow++
|
||||
|
||||
p[slow] = pfast
|
||||
}
|
||||
|
||||
return p[:slow+1]
|
||||
}
|
||||
|
||||
type TermLocationMap map[string]Locations
|
||||
|
||||
func (t TermLocationMap) AddLocation(term string, location *Location) {
|
||||
|
@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
|
|||
|
||||
var lastField string
|
||||
var tlm TermLocationMap
|
||||
var needsDedupe bool
|
||||
|
||||
for i, ftl := range dm.FieldTermLocations {
|
||||
if lastField != ftl.Field {
|
||||
|
@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
|
|||
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
|
||||
}
|
||||
|
||||
tlm[ftl.Term] = append(tlm[ftl.Term], loc)
|
||||
locs := tlm[ftl.Term]
|
||||
|
||||
// if the loc is before or at the last location, then there
|
||||
// might be duplicates that need to be deduplicated
|
||||
if !needsDedupe && len(locs) > 0 {
|
||||
last := locs[len(locs)-1]
|
||||
cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
|
||||
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
|
||||
needsDedupe = true
|
||||
}
|
||||
}
|
||||
|
||||
tlm[ftl.Term] = append(locs, loc)
|
||||
|
||||
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
|
||||
Location: Location{
|
||||
|
@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
|
|||
},
|
||||
}
|
||||
}
|
||||
|
||||
if needsDedupe {
|
||||
for _, tlm := range dm.Locations {
|
||||
for term, locs := range tlm {
|
||||
tlm[term] = locs.Dedupe()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
|
||||
|
@ -279,6 +359,7 @@ type SearcherOptions struct {
|
|||
type SearchContext struct {
|
||||
DocumentMatchPool *DocumentMatchPool
|
||||
Collector Collector
|
||||
IndexReader index.IndexReader
|
||||
}
|
||||
|
||||
func (sc *SearchContext) Size() int {
|
||||
|
|
|
@ -45,6 +45,7 @@ type BooleanSearcher struct {
|
|||
scorer *scorer.ConjunctionQueryScorer
|
||||
matches []*search.DocumentMatch
|
||||
initialized bool
|
||||
done bool
|
||||
}
|
||||
|
||||
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
|
||||
|
@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
|
|||
|
||||
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
|
||||
|
||||
if s.done {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if !s.initialized {
|
||||
err := s.initSearchers(ctx)
|
||||
if err != nil {
|
||||
|
@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
|
|||
}
|
||||
}
|
||||
|
||||
if rv == nil {
|
||||
s.done = true
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
|
||||
|
||||
if s.done {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if !s.initialized {
|
||||
err := s.initSearchers(ctx)
|
||||
if err != nil {
|
||||
|
@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
|
|||
}
|
||||
}
|
||||
|
||||
// Advance the searchers only if the currentID cursor is trailing the lookup ID,
|
||||
// additionally if the mustNotSearcher has been initialized, ensure that the
|
||||
// cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
|
||||
// currentID) is trailing the lookup ID as well - for in the case where currentID
|
||||
// is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
|
||||
// advance the currentID or the currMustNot cursors.
|
||||
if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
|
||||
(s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
|
||||
// Advance the searcher only if the cursor is trailing the lookup ID
|
||||
if s.currentID == nil || s.currentID.Compare(ID) < 0 {
|
||||
var err error
|
||||
if s.mustSearcher != nil {
|
||||
if s.currMust != nil {
|
||||
|
@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
|
|||
}
|
||||
|
||||
if s.mustNotSearcher != nil {
|
||||
if s.currMustNot != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currMustNot)
|
||||
}
|
||||
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// Additional check for mustNotSearcher, whose cursor isn't tracked by
|
||||
// currentID to prevent it from moving when the searcher's tracked
|
||||
// position is already ahead of or at the requested ID.
|
||||
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
|
||||
if s.currMustNot != nil {
|
||||
ctx.DocumentMatchPool.Put(s.currMustNot)
|
||||
}
|
||||
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,11 @@ import (
|
|||
"github.com/blevesearch/bleve/search"
|
||||
)
|
||||
|
||||
type filterFunc func(key []byte) bool
|
||||
|
||||
var GeoBitsShift1 = (geo.GeoBits << 1)
|
||||
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
|
||||
|
||||
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
|
||||
maxLon, maxLat float64, field string, boost float64,
|
||||
options search.SearcherOptions, checkBoundaries bool) (
|
||||
|
@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
|
|||
}
|
||||
|
||||
// do math to produce list of terms needed for this search
|
||||
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
|
||||
minLon, minLat, maxLon, maxLat, checkBoundaries)
|
||||
onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
|
||||
minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var onBoundarySearcher search.Searcher
|
||||
dvReader, err := indexReader.DocValueReader([]string{field})
|
||||
|
@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4
|
|||
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
|
||||
|
||||
func ComputeGeoRange(term uint64, shift uint,
|
||||
sminLon, sminLat, smaxLon, smaxLat float64,
|
||||
checkBoundaries bool) (
|
||||
onBoundary [][]byte, notOnBoundary [][]byte) {
|
||||
split := term | uint64(0x1)<<shift
|
||||
var upperMax uint64
|
||||
if shift < 63 {
|
||||
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
|
||||
} else {
|
||||
upperMax = 0xffffffffffffffff
|
||||
}
|
||||
lowerMax := split - 1
|
||||
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
|
||||
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
|
||||
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
|
||||
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
|
||||
onBoundary = append(onBoundary, plusOnBoundary...)
|
||||
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
|
||||
return
|
||||
}
|
||||
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
|
||||
indexReader index.IndexReader, field string) (
|
||||
onBoundary [][]byte, notOnBoundary [][]byte, err error) {
|
||||
preallocBytesLen := 32
|
||||
preallocBytes := make([]byte, preallocBytesLen)
|
||||
|
||||
func relateAndRecurse(start, end uint64, res uint,
|
||||
sminLon, sminLat, smaxLon, smaxLat float64,
|
||||
checkBoundaries bool) (
|
||||
onBoundary [][]byte, notOnBoundary [][]byte) {
|
||||
minLon := geo.MortonUnhashLon(start)
|
||||
minLat := geo.MortonUnhashLat(start)
|
||||
maxLon := geo.MortonUnhashLon(end)
|
||||
maxLat := geo.MortonUnhashLat(end)
|
||||
|
||||
level := ((geo.GeoBits << 1) - res) >> 1
|
||||
|
||||
within := res%document.GeoPrecisionStep == 0 &&
|
||||
geo.RectWithin(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat)
|
||||
if within || (level == geoDetailLevel &&
|
||||
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat)) {
|
||||
if !within && checkBoundaries {
|
||||
return [][]byte{
|
||||
numeric.MustNewPrefixCodedInt64(int64(start), res),
|
||||
}, nil
|
||||
makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
|
||||
if len(preallocBytes) <= 0 {
|
||||
preallocBytesLen = preallocBytesLen * 2
|
||||
preallocBytes = make([]byte, preallocBytesLen)
|
||||
}
|
||||
return nil,
|
||||
[][]byte{
|
||||
numeric.MustNewPrefixCodedInt64(int64(start), res),
|
||||
}
|
||||
} else if level < geoDetailLevel &&
|
||||
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat) {
|
||||
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
|
||||
checkBoundaries)
|
||||
|
||||
rv, preallocBytes, err =
|
||||
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
|
||||
|
||||
return rv
|
||||
}
|
||||
return nil, nil
|
||||
|
||||
var fieldDict index.FieldDictContains
|
||||
var isIndexed filterFunc
|
||||
if irr, ok := indexReader.(index.IndexReaderContains); ok {
|
||||
fieldDict, err = irr.FieldDictContains(field)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
isIndexed = func(term []byte) bool {
|
||||
found, err := fieldDict.Contains(term)
|
||||
return err == nil && found
|
||||
}
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if fieldDict != nil {
|
||||
if fd, ok := fieldDict.(index.FieldDict); ok {
|
||||
cerr := fd.Close()
|
||||
if cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if isIndexed == nil {
|
||||
isIndexed = func(term []byte) bool {
|
||||
if indexReader != nil {
|
||||
reader, err := indexReader.TermFieldReader(term, field, false, false, false)
|
||||
if err != nil || reader == nil {
|
||||
return false
|
||||
}
|
||||
if reader.Count() == 0 {
|
||||
_ = reader.Close()
|
||||
return false
|
||||
}
|
||||
_ = reader.Close()
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
var computeGeoRange func(term uint64, shift uint) // declare for recursion
|
||||
|
||||
relateAndRecurse := func(start, end uint64, res, level uint) {
|
||||
minLon := geo.MortonUnhashLon(start)
|
||||
minLat := geo.MortonUnhashLat(start)
|
||||
maxLon := geo.MortonUnhashLon(end)
|
||||
maxLat := geo.MortonUnhashLat(end)
|
||||
|
||||
within := res%document.GeoPrecisionStep == 0 &&
|
||||
geo.RectWithin(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat)
|
||||
if within || (level == geoDetailLevel &&
|
||||
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat)) {
|
||||
codedTerm := makePrefixCoded(int64(start), res)
|
||||
if isIndexed(codedTerm) {
|
||||
if !within && checkBoundaries {
|
||||
onBoundary = append(onBoundary, codedTerm)
|
||||
} else {
|
||||
notOnBoundary = append(notOnBoundary, codedTerm)
|
||||
}
|
||||
}
|
||||
} else if level < geoDetailLevel &&
|
||||
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
|
||||
sminLon, sminLat, smaxLon, smaxLat) {
|
||||
computeGeoRange(start, res-1)
|
||||
}
|
||||
}
|
||||
|
||||
computeGeoRange = func(term uint64, shift uint) {
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
split := term | uint64(0x1)<<shift
|
||||
var upperMax uint64
|
||||
if shift < 63 {
|
||||
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
|
||||
} else {
|
||||
upperMax = 0xffffffffffffffff
|
||||
}
|
||||
|
||||
lowerMax := split - 1
|
||||
|
||||
level := (GeoBitsShift1 - shift) >> 1
|
||||
|
||||
relateAndRecurse(term, lowerMax, shift, level)
|
||||
relateAndRecurse(split, upperMax, shift, level)
|
||||
}
|
||||
|
||||
computeGeoRange(term, shift)
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return onBoundary, notOnBoundary, err
|
||||
}
|
||||
|
||||
func buildRectFilter(dvReader index.DocValueReader, field string,
|
||||
|
|
11
vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
generated
vendored
11
vendor/github.com/blevesearch/bleve/search/searcher/search_geopointdistance.go
generated
vendored
|
@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
|
|||
// build a searcher for the box
|
||||
boxSearcher, err := boxSearcher(indexReader,
|
||||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
|
||||
field, boost, options)
|
||||
field, boost, options, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
|
|||
// two boxes joined through a disjunction searcher
|
||||
func boxSearcher(indexReader index.IndexReader,
|
||||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
|
||||
field string, boost float64, options search.SearcherOptions) (
|
||||
field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
|
||||
search.Searcher, error) {
|
||||
if bottomRightLon < topLeftLon {
|
||||
// cross date line, rewrite as two parts
|
||||
|
||||
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
|
||||
-180, bottomRightLat, bottomRightLon, topLeftLat,
|
||||
field, boost, options, false)
|
||||
field, boost, options, checkBoundaries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
|
||||
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
|
||||
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
|
||||
checkBoundaries)
|
||||
if err != nil {
|
||||
_ = leftSearcher.Close()
|
||||
return nil, err
|
||||
|
@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader,
|
|||
// build geoboundinggox searcher for that bounding box
|
||||
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
|
||||
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
|
||||
options, false)
|
||||
options, checkBoundaries)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
110
vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
generated
vendored
Normal file
110
vendor/github.com/blevesearch/bleve/search/searcher/search_geopolygon.go
generated
vendored
Normal file
|
@ -0,0 +1,110 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package searcher
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/geo"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"math"
|
||||
)
|
||||
|
||||
func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
|
||||
polygon []geo.Point, field string, boost float64,
|
||||
options search.SearcherOptions) (search.Searcher, error) {
|
||||
|
||||
// compute the bounding box enclosing the polygon
|
||||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
|
||||
geo.BoundingRectangleForPolygon(polygon)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// build a searcher for the bounding box on the polygon
|
||||
boxSearcher, err := boxSearcher(indexReader,
|
||||
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
|
||||
field, boost, options, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dvReader, err := indexReader.DocValueReader([]string{field})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// wrap it in a filtering searcher that checks for the polygon inclusivity
|
||||
return NewFilteringSearcher(boxSearcher,
|
||||
buildPolygonFilter(dvReader, field, polygon)), nil
|
||||
}
|
||||
|
||||
const float64EqualityThreshold = 1e-6
|
||||
|
||||
func almostEqual(a, b float64) bool {
|
||||
return math.Abs(a-b) <= float64EqualityThreshold
|
||||
}
|
||||
|
||||
// buildPolygonFilter returns true if the point lies inside the
|
||||
// polygon. It is based on the ray-casting technique as referred
|
||||
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
|
||||
func buildPolygonFilter(dvReader index.DocValueReader, field string,
|
||||
polygon []geo.Point) FilterFunc {
|
||||
return func(d *search.DocumentMatch) bool {
|
||||
var lon, lat float64
|
||||
var found bool
|
||||
|
||||
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
|
||||
// only consider the values which are shifted 0
|
||||
prefixCoded := numeric.PrefixCoded(term)
|
||||
shift, err := prefixCoded.Shift()
|
||||
if err == nil && shift == 0 {
|
||||
i64, err := prefixCoded.Int64()
|
||||
if err == nil {
|
||||
lon = geo.MortonUnhashLon(uint64(i64))
|
||||
lat = geo.MortonUnhashLat(uint64(i64))
|
||||
found = true
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
// Note: this approach works for points which are strictly inside
|
||||
// the polygon. ie it might fail for certain points on the polygon boundaries.
|
||||
if err == nil && found {
|
||||
nVertices := len(polygon)
|
||||
var inside bool
|
||||
// check for a direct vertex match
|
||||
if almostEqual(polygon[0].Lat, lat) &&
|
||||
almostEqual(polygon[0].Lon, lon) {
|
||||
return true
|
||||
}
|
||||
|
||||
for i := 1; i < nVertices; i++ {
|
||||
if almostEqual(polygon[i].Lat, lat) &&
|
||||
almostEqual(polygon[i].Lon, lon) {
|
||||
return true
|
||||
}
|
||||
if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) &&
|
||||
lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/
|
||||
(polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon {
|
||||
inside = !inside
|
||||
}
|
||||
}
|
||||
return inside
|
||||
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
|
@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
|
|||
if !*inclusiveMax && maxInt64 != math.MinInt64 {
|
||||
maxInt64--
|
||||
}
|
||||
|
||||
var fieldDict index.FieldDictContains
|
||||
var isIndexed filterFunc
|
||||
var err error
|
||||
if irr, ok := indexReader.(index.IndexReaderContains); ok {
|
||||
fieldDict, err = irr.FieldDictContains(field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
isIndexed = func(term []byte) bool {
|
||||
found, err := fieldDict.Contains(term)
|
||||
return err == nil && found
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME hard-coded precision, should match field declaration
|
||||
termRanges := splitInt64Range(minInt64, maxInt64, 4)
|
||||
terms := termRanges.Enumerate()
|
||||
terms := termRanges.Enumerate(isIndexed)
|
||||
if fieldDict != nil {
|
||||
if fd, ok := fieldDict.(index.FieldDict); ok {
|
||||
cerr := fd.Close()
|
||||
if cerr != nil {
|
||||
err = cerr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(terms) < 1 {
|
||||
// cannot return MatchNoneSearcher because of interaction with
|
||||
// commit f391b991c20f02681bacd197afc6d8aed444e132
|
||||
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
|
||||
true)
|
||||
}
|
||||
var err error
|
||||
terms, err = filterCandidateTerms(indexReader, terms, field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
// for upside_down
|
||||
if isIndexed == nil {
|
||||
terms, err = filterCandidateTerms(indexReader, terms, field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if tooManyClauses(len(terms)) {
|
||||
return nil, tooManyClausesErr(len(terms))
|
||||
}
|
||||
|
@ -125,11 +154,17 @@ type termRange struct {
|
|||
endTerm []byte
|
||||
}
|
||||
|
||||
func (t *termRange) Enumerate() [][]byte {
|
||||
func (t *termRange) Enumerate(filter filterFunc) [][]byte {
|
||||
var rv [][]byte
|
||||
next := t.startTerm
|
||||
for bytes.Compare(next, t.endTerm) <= 0 {
|
||||
rv = append(rv, next)
|
||||
if filter != nil {
|
||||
if filter(next) {
|
||||
rv = append(rv, next)
|
||||
}
|
||||
} else {
|
||||
rv = append(rv, next)
|
||||
}
|
||||
next = incrementBytes(next)
|
||||
}
|
||||
return rv
|
||||
|
@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte {
|
|||
|
||||
type termRanges []*termRange
|
||||
|
||||
func (tr termRanges) Enumerate() [][]byte {
|
||||
func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
|
||||
var rv [][]byte
|
||||
for _, tri := range tr {
|
||||
trie := tri.Enumerate()
|
||||
trie := tri.Enumerate(filter)
|
||||
rv = append(rv, trie...)
|
||||
}
|
||||
return rv
|
||||
|
|
|
@ -38,6 +38,8 @@ type SearchSort interface {
|
|||
RequiresScoring() bool
|
||||
RequiresFields() []string
|
||||
|
||||
Reverse()
|
||||
|
||||
Copy() SearchSort
|
||||
}
|
||||
|
||||
|
@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool {
|
|||
return rv
|
||||
}
|
||||
|
||||
func (so SortOrder) Reverse() {
|
||||
for _, soi := range so {
|
||||
soi.Reverse()
|
||||
}
|
||||
}
|
||||
|
||||
// SortFieldType lets you control some internal sort behavior
|
||||
// normally leaving this to the zero-value of SortFieldAuto is fine
|
||||
type SortFieldType int
|
||||
|
@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort {
|
|||
return &rv
|
||||
}
|
||||
|
||||
func (s *SortField) Reverse() {
|
||||
s.Desc = !s.Desc
|
||||
if s.Missing == SortFieldMissingFirst {
|
||||
s.Missing = SortFieldMissingLast
|
||||
} else {
|
||||
s.Missing = SortFieldMissingFirst
|
||||
}
|
||||
}
|
||||
|
||||
// SortDocID will sort results by the document identifier
|
||||
type SortDocID struct {
|
||||
Desc bool
|
||||
|
@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort {
|
|||
return &rv
|
||||
}
|
||||
|
||||
func (s *SortDocID) Reverse() {
|
||||
s.Desc = !s.Desc
|
||||
}
|
||||
|
||||
// SortScore will sort results by the document match score
|
||||
type SortScore struct {
|
||||
Desc bool
|
||||
|
@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort {
|
|||
return &rv
|
||||
}
|
||||
|
||||
func (s *SortScore) Reverse() {
|
||||
s.Desc = !s.Desc
|
||||
}
|
||||
|
||||
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
|
||||
|
||||
// NewSortGeoDistance creates SearchSort instance for sorting documents by
|
||||
|
@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort {
|
|||
return &rv
|
||||
}
|
||||
|
||||
func (s *SortGeoDistance) Reverse() {
|
||||
s.Desc = !s.Desc
|
||||
}
|
||||
|
||||
type BytesSlice [][]byte
|
||||
|
||||
func (p BytesSlice) Len() int { return len(p) }
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- 1.4
|
||||
- 1.7
|
||||
|
||||
script:
|
||||
- go get golang.org/x/tools/cmd/vet
|
||||
- go get golang.org/x/tools/cmd/cover
|
||||
- go get github.com/mattn/goveralls
|
||||
- go test -v -covermode=count -coverprofile=profile.out
|
||||
|
|
|
@ -18,7 +18,7 @@ import (
|
|||
"bytes"
|
||||
)
|
||||
|
||||
// Iterator represents a means of visity key/value pairs in order.
|
||||
// Iterator represents a means of visiting key/value pairs in order.
|
||||
type Iterator interface {
|
||||
|
||||
// Current() returns the key/value pair currently pointed to.
|
||||
|
@ -186,20 +186,29 @@ func (i *FSTIterator) Next() error {
|
|||
}
|
||||
|
||||
func (i *FSTIterator) next(lastOffset int) error {
|
||||
// remember where we started
|
||||
// remember where we started with keysStack in this next() call
|
||||
i.nextStart = append(i.nextStart[:0], i.keysStack...)
|
||||
|
||||
nextOffset := lastOffset + 1
|
||||
allowCompare := false
|
||||
|
||||
OUTER:
|
||||
for true {
|
||||
curr := i.statesStack[len(i.statesStack)-1]
|
||||
autCurr := i.autStatesStack[len(i.autStatesStack)-1]
|
||||
|
||||
if curr.Final() && i.aut.IsMatch(autCurr) &&
|
||||
bytes.Compare(i.keysStack, i.nextStart) > 0 {
|
||||
// in final state greater than start key
|
||||
return nil
|
||||
if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare {
|
||||
// check to see if new keystack might have gone too far
|
||||
if i.endKeyExclusive != nil &&
|
||||
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
|
||||
return ErrIteratorDone
|
||||
}
|
||||
|
||||
cmp := bytes.Compare(i.keysStack, i.nextStart)
|
||||
if cmp > 0 {
|
||||
// in final state greater than start key
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
numTrans := curr.NumTransitions()
|
||||
|
@ -207,8 +216,12 @@ OUTER:
|
|||
INNER:
|
||||
for nextOffset < numTrans {
|
||||
t := curr.TransitionAt(nextOffset)
|
||||
|
||||
autNext := i.aut.Accept(autCurr, t)
|
||||
if !i.aut.CanMatch(autNext) {
|
||||
// TODO: potential optimization to skip nextOffset
|
||||
// forwards more directly to something that the
|
||||
// automaton likes rather than a linear scan?
|
||||
nextOffset += 1
|
||||
continue INNER
|
||||
}
|
||||
|
@ -234,30 +247,41 @@ OUTER:
|
|||
i.valsStack = append(i.valsStack, v)
|
||||
i.autStatesStack = append(i.autStatesStack, autNext)
|
||||
|
||||
// check to see if new keystack might have gone too far
|
||||
if i.endKeyExclusive != nil &&
|
||||
bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 {
|
||||
return ErrIteratorDone
|
||||
}
|
||||
|
||||
nextOffset = 0
|
||||
allowCompare = true
|
||||
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
// no more transitions, so need to backtrack and stack pop
|
||||
if len(i.statesStack) <= 1 {
|
||||
// stack len is 1 (root), can't go back further, we're done
|
||||
break
|
||||
}
|
||||
|
||||
// no transitions, and still room to pop
|
||||
i.statesStack = i.statesStack[:len(i.statesStack)-1]
|
||||
i.keysStack = i.keysStack[:len(i.keysStack)-1]
|
||||
// if the top of the stack represents a linear chain of states
|
||||
// (i.e., a suffix of nodes linked by single transitions),
|
||||
// then optimize by popping the suffix in one shot without
|
||||
// going back all the way to the OUTER loop
|
||||
var popNum int
|
||||
for j := len(i.statesStack) - 1; j > 0; j-- {
|
||||
if i.statesStack[j].NumTransitions() != 1 {
|
||||
popNum = len(i.statesStack) - 1 - j
|
||||
break
|
||||
}
|
||||
}
|
||||
if popNum < 1 { // always pop at least 1 entry from the stacks
|
||||
popNum = 1
|
||||
}
|
||||
|
||||
nextOffset = i.keysPosStack[len(i.keysPosStack)-1] + 1
|
||||
nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1
|
||||
allowCompare = false
|
||||
|
||||
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-1]
|
||||
i.valsStack = i.valsStack[:len(i.valsStack)-1]
|
||||
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-1]
|
||||
i.statesStack = i.statesStack[:len(i.statesStack)-popNum]
|
||||
i.keysStack = i.keysStack[:len(i.keysStack)-popNum]
|
||||
i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum]
|
||||
i.valsStack = i.valsStack[:len(i.valsStack)-popNum]
|
||||
i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum]
|
||||
}
|
||||
|
||||
return ErrIteratorDone
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
module github.com/couchbase/vellum
|
||||
|
||||
go 1.12
|
||||
|
||||
require (
|
||||
github.com/edsrzf/mmap-go v1.0.0
|
||||
github.com/spf13/cobra v0.0.5
|
||||
github.com/willf/bitset v1.1.10
|
||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a // indirect
|
||||
)
|
|
@ -0,0 +1,39 @@
|
|||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
|
||||
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
|
||||
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
|
||||
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
|
||||
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
|
||||
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
|
||||
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
|
||||
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
|
||||
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
|
||||
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
|
||||
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
|
||||
github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
|
||||
github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
|
||||
github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
|
||||
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
|
||||
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
|
||||
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
|
||||
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
|
||||
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
|
||||
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
|
||||
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a h1:aYOabOQFp6Vj6W1F80affTUvO9UxmJRx8K0gsfABByQ=
|
||||
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
package levenshtein
|
||||
|
||||
import (
|
||||
"fmt"
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
package levenshtein
|
||||
|
||||
import (
|
||||
"fmt"
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
package levenshtein
|
||||
|
||||
import "fmt"
|
||||
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
package levenshtein
|
||||
|
||||
import (
|
||||
"math"
|
|
@ -12,7 +12,7 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein2
|
||||
package levenshtein
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
|
@ -75,15 +75,23 @@ func (c *compiler) c(ast *syntax.Regexp) (err error) {
|
|||
Rune0: [2]rune{r, r},
|
||||
}
|
||||
next.Rune = next.Rune0[0:2]
|
||||
return c.c(&next)
|
||||
}
|
||||
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
|
||||
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, seq := range c.sequences {
|
||||
c.compileUtf8Ranges(seq)
|
||||
// try to find more folded runes
|
||||
for r1 := unicode.SimpleFold(r); r1 != r; r1 = unicode.SimpleFold(r1) {
|
||||
next.Rune = append(next.Rune, r1, r1)
|
||||
}
|
||||
err = c.c(&next)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
|
||||
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, seq := range c.sequences {
|
||||
c.compileUtf8Ranges(seq)
|
||||
}
|
||||
}
|
||||
}
|
||||
case syntax.OpAnyChar:
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
// +build riscv64
|
||||
|
||||
package bbolt
|
||||
|
||||
// maxMapSize represents the largest mmap size supported by Bolt.
|
||||
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
|
||||
|
||||
// maxAllocSize is the size used when creating array pointers.
|
||||
const maxAllocSize = 0x7FFFFFFF
|
||||
|
||||
// Are unaligned load/stores broken on this arch?
|
||||
var brokenUnaligned = true
|
|
@ -121,6 +121,7 @@ type DB struct {
|
|||
AllocSize int
|
||||
|
||||
path string
|
||||
openFile func(string, int, os.FileMode) (*os.File, error)
|
||||
file *os.File
|
||||
dataref []byte // mmap'ed readonly, write throws SEGV
|
||||
data *[maxMapSize]byte
|
||||
|
@ -199,10 +200,15 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
|||
db.readOnly = true
|
||||
}
|
||||
|
||||
db.openFile = options.OpenFile
|
||||
if db.openFile == nil {
|
||||
db.openFile = os.OpenFile
|
||||
}
|
||||
|
||||
// Open data file and separate sync handler for metadata writes.
|
||||
db.path = path
|
||||
var err error
|
||||
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
|
||||
if db.file, err = db.openFile(db.path, flag|os.O_CREATE, mode); err != nil {
|
||||
_ = db.close()
|
||||
return nil, err
|
||||
}
|
||||
|
@ -1054,6 +1060,10 @@ type Options struct {
|
|||
// set directly on the DB itself when returned from Open(), but this option
|
||||
// is useful in APIs which expose Options but not the underlying DB.
|
||||
NoSync bool
|
||||
|
||||
// OpenFile is used to open files. It defaults to os.OpenFile. This option
|
||||
// is useful for writing hermetic tests.
|
||||
OpenFile func(string, int, os.FileMode) (*os.File, error)
|
||||
}
|
||||
|
||||
// DefaultOptions represent the options used if nil options are passed into Open().
|
||||
|
|
|
@ -349,6 +349,28 @@ func (f *freelist) reload(p *page) {
|
|||
f.readIDs(a)
|
||||
}
|
||||
|
||||
// noSyncReload reads the freelist from pgids and filters out pending items.
|
||||
func (f *freelist) noSyncReload(pgids []pgid) {
|
||||
// Build a cache of only pending pages.
|
||||
pcache := make(map[pgid]bool)
|
||||
for _, txp := range f.pending {
|
||||
for _, pendingID := range txp.ids {
|
||||
pcache[pendingID] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Check each page in the freelist and build a new available freelist
|
||||
// with any pages not in the pending lists.
|
||||
var a []pgid
|
||||
for _, id := range pgids {
|
||||
if !pcache[id] {
|
||||
a = append(a, id)
|
||||
}
|
||||
}
|
||||
|
||||
f.readIDs(a)
|
||||
}
|
||||
|
||||
// reindex rebuilds the free cache based on available and pending free lists.
|
||||
func (f *freelist) reindex() {
|
||||
ids := f.getFreePageIDs()
|
||||
|
|
|
@ -254,17 +254,36 @@ func (tx *Tx) Rollback() error {
|
|||
if tx.db == nil {
|
||||
return ErrTxClosed
|
||||
}
|
||||
tx.rollback()
|
||||
tx.nonPhysicalRollback()
|
||||
return nil
|
||||
}
|
||||
|
||||
// nonPhysicalRollback is called when user calls Rollback directly, in this case we do not need to reload the free pages from disk.
|
||||
func (tx *Tx) nonPhysicalRollback() {
|
||||
if tx.db == nil {
|
||||
return
|
||||
}
|
||||
if tx.writable {
|
||||
tx.db.freelist.rollback(tx.meta.txid)
|
||||
}
|
||||
tx.close()
|
||||
}
|
||||
|
||||
// rollback needs to reload the free pages from disk in case some system error happens like fsync error.
|
||||
func (tx *Tx) rollback() {
|
||||
if tx.db == nil {
|
||||
return
|
||||
}
|
||||
if tx.writable {
|
||||
tx.db.freelist.rollback(tx.meta.txid)
|
||||
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
|
||||
if !tx.db.hasSyncedFreelist() {
|
||||
// Reconstruct free page list by scanning the DB to get the whole free page list.
|
||||
// Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode.
|
||||
tx.db.freelist.noSyncReload(tx.db.freepages())
|
||||
} else {
|
||||
// Read free page list from freelist page.
|
||||
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
|
||||
}
|
||||
}
|
||||
tx.close()
|
||||
}
|
||||
|
@ -315,7 +334,7 @@ func (tx *Tx) Copy(w io.Writer) error {
|
|||
// If err == nil then exactly tx.Size() bytes will be written into the writer.
|
||||
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
||||
// Attempt to open reader with WriteFlag
|
||||
f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
|
||||
f, err := tx.db.openFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
@ -369,7 +388,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
|||
// A reader transaction is maintained during the copy so it is safe to continue
|
||||
// using the database while a copy is in progress.
|
||||
func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
|
||||
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
|
||||
f, err := tx.db.openFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
The MIT license.
|
||||
|
||||
Copyright (c) 2014 the go-unsnap-stream authors.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
|
@ -7,6 +9,9 @@ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
|
@ -14,5 +19,3 @@ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Permission is explicitly granted to relicense this material under new terms of
|
||||
your choice when integrating this library with another library or project.
|
||||
|
|
|
@ -7,7 +7,9 @@ Note that the *streaming or framing format* for snappy is different from snappy
|
|||
|
||||
Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4].
|
||||
|
||||
For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C.
|
||||
Update to the previous paragraph: Horray! Good news: Thanks to @nigeltao, we have since learned that the [github.com/golang/snappy](https://github.com/golang/snappy) package now provides the snappy streaming format too. Even though the type level descriptions are a little misleading because they don't mention that they are for the stream format, the [snappy package header documentation](https://godoc.org/github.com/golang/snappy) points out that the [snappy.Reader](https://godoc.org/github.com/golang/snappy#Reader) and [snappy.Writer](https://godoc.org/github.com/golang/snappy#Writer) types do indeed provide stream (vs block) handling. Although I have not benchmarked, you should probably prefer that package as it will likely be maintained more than I have time to devote, and also perhaps better integrated with the underlying snappy as they share the same repo.
|
||||
|
||||
For binary compatibility with the [python implementation](https://pypi.python.org/pypi/python-snappy) in [4], one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C.
|
||||
|
||||
However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead.
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"hash/crc32"
|
||||
|
||||
|
@ -189,7 +190,12 @@ func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedS
|
|||
err = nil
|
||||
}
|
||||
} else {
|
||||
panic(err)
|
||||
// may be an odd already closed... don't panic on that
|
||||
if strings.Contains(err.Error(), "file already closed") {
|
||||
err = nil
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@ import (
|
|||
"reflect"
|
||||
)
|
||||
|
||||
const resumableDefault = false
|
||||
|
||||
var (
|
||||
// ErrShortBytes is returned when the
|
||||
// slice being decoded is too short to
|
||||
|
@ -26,99 +28,240 @@ type Error interface {
|
|||
// Resumable returns whether
|
||||
// or not the error means that
|
||||
// the stream of data is malformed
|
||||
// and the information is unrecoverable.
|
||||
// and the information is unrecoverable.
|
||||
Resumable() bool
|
||||
}
|
||||
|
||||
// contextError allows msgp Error instances to be enhanced with additional
|
||||
// context about their origin.
|
||||
type contextError interface {
|
||||
Error
|
||||
|
||||
// withContext must not modify the error instance - it must clone and
|
||||
// return a new error with the context added.
|
||||
withContext(ctx string) error
|
||||
}
|
||||
|
||||
// Cause returns the underlying cause of an error that has been wrapped
|
||||
// with additional context.
|
||||
func Cause(e error) error {
|
||||
out := e
|
||||
if e, ok := e.(errWrapped); ok && e.cause != nil {
|
||||
out = e.cause
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Resumable returns whether or not the error means that the stream of data is
|
||||
// malformed and the information is unrecoverable.
|
||||
func Resumable(e error) bool {
|
||||
if e, ok := e.(Error); ok {
|
||||
return e.Resumable()
|
||||
}
|
||||
return resumableDefault
|
||||
}
|
||||
|
||||
// WrapError wraps an error with additional context that allows the part of the
|
||||
// serialized type that caused the problem to be identified. Underlying errors
|
||||
// can be retrieved using Cause()
|
||||
//
|
||||
// The input error is not modified - a new error should be returned.
|
||||
//
|
||||
// ErrShortBytes is not wrapped with any context due to backward compatibility
|
||||
// issues with the public API.
|
||||
//
|
||||
func WrapError(err error, ctx ...interface{}) error {
|
||||
switch e := err.(type) {
|
||||
case errShort:
|
||||
return e
|
||||
case contextError:
|
||||
return e.withContext(ctxString(ctx))
|
||||
default:
|
||||
return errWrapped{cause: err, ctx: ctxString(ctx)}
|
||||
}
|
||||
}
|
||||
|
||||
// ctxString converts the incoming interface{} slice into a single string.
|
||||
func ctxString(ctx []interface{}) string {
|
||||
out := ""
|
||||
for idx, cv := range ctx {
|
||||
if idx > 0 {
|
||||
out += "/"
|
||||
}
|
||||
out += fmt.Sprintf("%v", cv)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func addCtx(ctx, add string) string {
|
||||
if ctx != "" {
|
||||
return add + "/" + ctx
|
||||
} else {
|
||||
return add
|
||||
}
|
||||
}
|
||||
|
||||
// errWrapped allows arbitrary errors passed to WrapError to be enhanced with
|
||||
// context and unwrapped with Cause()
|
||||
type errWrapped struct {
|
||||
cause error
|
||||
ctx string
|
||||
}
|
||||
|
||||
func (e errWrapped) Error() string {
|
||||
if e.ctx != "" {
|
||||
return fmt.Sprintf("%s at %s", e.cause, e.ctx)
|
||||
} else {
|
||||
return e.cause.Error()
|
||||
}
|
||||
}
|
||||
|
||||
func (e errWrapped) Resumable() bool {
|
||||
if e, ok := e.cause.(Error); ok {
|
||||
return e.Resumable()
|
||||
}
|
||||
return resumableDefault
|
||||
}
|
||||
|
||||
type errShort struct{}
|
||||
|
||||
func (e errShort) Error() string { return "msgp: too few bytes left to read object" }
|
||||
func (e errShort) Resumable() bool { return false }
|
||||
|
||||
type errFatal struct{}
|
||||
type errFatal struct {
|
||||
ctx string
|
||||
}
|
||||
|
||||
func (f errFatal) Error() string {
|
||||
out := "msgp: fatal decoding error (unreachable code)"
|
||||
if f.ctx != "" {
|
||||
out += " at " + f.ctx
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (f errFatal) Error() string { return "msgp: fatal decoding error (unreachable code)" }
|
||||
func (f errFatal) Resumable() bool { return false }
|
||||
|
||||
func (f errFatal) withContext(ctx string) error { f.ctx = addCtx(f.ctx, ctx); return f }
|
||||
|
||||
// ArrayError is an error returned
|
||||
// when decoding a fix-sized array
|
||||
// of the wrong size
|
||||
type ArrayError struct {
|
||||
Wanted uint32
|
||||
Got uint32
|
||||
ctx string
|
||||
}
|
||||
|
||||
// Error implements the error interface
|
||||
func (a ArrayError) Error() string {
|
||||
return fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got)
|
||||
out := fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got)
|
||||
if a.ctx != "" {
|
||||
out += " at " + a.ctx
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Resumable is always 'true' for ArrayErrors
|
||||
func (a ArrayError) Resumable() bool { return true }
|
||||
|
||||
func (a ArrayError) withContext(ctx string) error { a.ctx = addCtx(a.ctx, ctx); return a }
|
||||
|
||||
// IntOverflow is returned when a call
|
||||
// would downcast an integer to a type
|
||||
// with too few bits to hold its value.
|
||||
type IntOverflow struct {
|
||||
Value int64 // the value of the integer
|
||||
FailedBitsize int // the bit size that the int64 could not fit into
|
||||
ctx string
|
||||
}
|
||||
|
||||
// Error implements the error interface
|
||||
func (i IntOverflow) Error() string {
|
||||
return fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize)
|
||||
str := fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize)
|
||||
if i.ctx != "" {
|
||||
str += " at " + i.ctx
|
||||
}
|
||||
return str
|
||||
}
|
||||
|
||||
// Resumable is always 'true' for overflows
|
||||
func (i IntOverflow) Resumable() bool { return true }
|
||||
|
||||
func (i IntOverflow) withContext(ctx string) error { i.ctx = addCtx(i.ctx, ctx); return i }
|
||||
|
||||
// UintOverflow is returned when a call
|
||||
// would downcast an unsigned integer to a type
|
||||
// with too few bits to hold its value
|
||||
type UintOverflow struct {
|
||||
Value uint64 // value of the uint
|
||||
FailedBitsize int // the bit size that couldn't fit the value
|
||||
ctx string
|
||||
}
|
||||
|
||||
// Error implements the error interface
|
||||
func (u UintOverflow) Error() string {
|
||||
return fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize)
|
||||
str := fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize)
|
||||
if u.ctx != "" {
|
||||
str += " at " + u.ctx
|
||||
}
|
||||
return str
|
||||
}
|
||||
|
||||
// Resumable is always 'true' for overflows
|
||||
func (u UintOverflow) Resumable() bool { return true }
|
||||
|
||||
func (u UintOverflow) withContext(ctx string) error { u.ctx = addCtx(u.ctx, ctx); return u }
|
||||
|
||||
// UintBelowZero is returned when a call
|
||||
// would cast a signed integer below zero
|
||||
// to an unsigned integer.
|
||||
type UintBelowZero struct {
|
||||
Value int64 // value of the incoming int
|
||||
ctx string
|
||||
}
|
||||
|
||||
// Error implements the error interface
|
||||
func (u UintBelowZero) Error() string {
|
||||
return fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value)
|
||||
str := fmt.Sprintf("msgp: attempted to cast int %d to unsigned", u.Value)
|
||||
if u.ctx != "" {
|
||||
str += " at " + u.ctx
|
||||
}
|
||||
return str
|
||||
}
|
||||
|
||||
// Resumable is always 'true' for overflows
|
||||
func (u UintBelowZero) Resumable() bool { return true }
|
||||
|
||||
func (u UintBelowZero) withContext(ctx string) error {
|
||||
u.ctx = ctx
|
||||
return u
|
||||
}
|
||||
|
||||
// A TypeError is returned when a particular
|
||||
// decoding method is unsuitable for decoding
|
||||
// a particular MessagePack value.
|
||||
type TypeError struct {
|
||||
Method Type // Type expected by method
|
||||
Encoded Type // Type actually encoded
|
||||
|
||||
ctx string
|
||||
}
|
||||
|
||||
// Error implements the error interface
|
||||
func (t TypeError) Error() string {
|
||||
return fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method)
|
||||
out := fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method)
|
||||
if t.ctx != "" {
|
||||
out += " at " + t.ctx
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Resumable returns 'true' for TypeErrors
|
||||
func (t TypeError) Resumable() bool { return true }
|
||||
|
||||
func (t TypeError) withContext(ctx string) error { t.ctx = addCtx(t.ctx, ctx); return t }
|
||||
|
||||
// returns either InvalidPrefixError or
|
||||
// TypeError depending on whether or not
|
||||
// the prefix is recognized
|
||||
|
@ -148,10 +291,24 @@ func (i InvalidPrefixError) Resumable() bool { return false }
|
|||
// to a function that takes `interface{}`.
|
||||
type ErrUnsupportedType struct {
|
||||
T reflect.Type
|
||||
|
||||
ctx string
|
||||
}
|
||||
|
||||
// Error implements error
|
||||
func (e *ErrUnsupportedType) Error() string { return fmt.Sprintf("msgp: type %q not supported", e.T) }
|
||||
func (e *ErrUnsupportedType) Error() string {
|
||||
out := fmt.Sprintf("msgp: type %q not supported", e.T)
|
||||
if e.ctx != "" {
|
||||
out += " at " + e.ctx
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Resumable returns 'true' for ErrUnsupportedType
|
||||
func (e *ErrUnsupportedType) Resumable() bool { return true }
|
||||
|
||||
func (e *ErrUnsupportedType) withContext(ctx string) error {
|
||||
o := *e
|
||||
o.ctx = addCtx(o.ctx, ctx)
|
||||
return &o
|
||||
}
|
||||
|
|
|
@ -685,7 +685,7 @@ func (mw *Writer) WriteIntf(v interface{}) error {
|
|||
case reflect.Map:
|
||||
return mw.writeMap(val)
|
||||
}
|
||||
return &ErrUnsupportedType{val.Type()}
|
||||
return &ErrUnsupportedType{T: val.Type()}
|
||||
}
|
||||
|
||||
func (mw *Writer) writeMap(v reflect.Value) (err error) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue