Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Range Query Optimization (For sequential Vindex types) #17342

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions go/vt/key/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ func Empty(id []byte) bool {
// KeyRange helper methods
//

// Make a Key Range
func NewKeyRange(start []byte, end []byte) *topodatapb.KeyRange {
return &topodatapb.KeyRange{Start: start, End: end}
}

// KeyRangeAdd adds two adjacent KeyRange values (in any order) into a single value. If the values are not adjacent,
// it returns false.
func KeyRangeAdd(a, b *topodatapb.KeyRange) (*topodatapb.KeyRange, bool) {
Expand Down
43 changes: 43 additions & 0 deletions go/vt/vtgate/engine/routing.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ const (
// IN is for routing a statement to a multi shard.
// Requires: A Vindex, and a multi Values.
IN
// Between is for routing a statement to a multi shard
// Requires: A Vindex, and start and end Value.
Between
// MultiEqual is used for routing queries with IN with tuple clause
// Requires: A Vindex, and a multi Tuple Values.
MultiEqual
Expand Down Expand Up @@ -78,6 +81,7 @@ var opName = map[Opcode]string{
EqualUnique: "EqualUnique",
Equal: "Equal",
IN: "IN",
Between: "Between",
MultiEqual: "MultiEqual",
Scatter: "Scatter",
DBA: "DBA",
Expand Down Expand Up @@ -157,6 +161,14 @@ func (rp *RoutingParameters) findRoute(ctx context.Context, vcursor VCursor, bin
default:
return rp.in(ctx, vcursor, bindVars)
}
case Between:
switch rp.Vindex.(type) {
case vindexes.SingleColumn:
return rp.between(ctx, vcursor, bindVars)
default:
// Only SingleColumn vindex supported.
return nil, nil, vterrors.VT13001("between supported on SingleColumn vindex only")
}
case MultiEqual:
switch rp.Vindex.(type) {
case vindexes.MultiColumn:
Expand Down Expand Up @@ -396,6 +408,19 @@ func (rp *RoutingParameters) inMultiCol(ctx context.Context, vcursor VCursor, bi
return rss, shardVarsMultiCol(bindVars, mapVals, isSingleVal), nil
}

func (rp *RoutingParameters) between(ctx context.Context, vcursor VCursor, bindVars map[string]*querypb.BindVariable) ([]*srvtopo.ResolvedShard, []map[string]*querypb.BindVariable, error) {
env := evalengine.NewExpressionEnv(ctx, bindVars, vcursor)
value, err := env.Evaluate(rp.Values[0])
if err != nil {
return nil, nil, err
}
rss, values, err := resolveShardsBetween(ctx, vcursor, rp.Vindex.(vindexes.Sequential), rp.Keyspace, value.TupleValues())
if err != nil {
return nil, nil, err
}
return rss, shardVars(bindVars, values), nil
}

func (rp *RoutingParameters) multiEqual(ctx context.Context, vcursor VCursor, bindVars map[string]*querypb.BindVariable) ([]*srvtopo.ResolvedShard, []map[string]*querypb.BindVariable, error) {
env := evalengine.NewExpressionEnv(ctx, bindVars, vcursor)
value, err := env.Evaluate(rp.Values[0])
Expand Down Expand Up @@ -520,6 +545,24 @@ func buildMultiColumnVindexValues(shardsValues [][][]sqltypes.Value) [][][]*quer
return shardsIds
}

func resolveShardsBetween(ctx context.Context, vcursor VCursor, vindex vindexes.Sequential, keyspace *vindexes.Keyspace, vindexKeys []sqltypes.Value) ([]*srvtopo.ResolvedShard, [][]*querypb.Value, error) {
// Convert vindexKeys to []*querypb.Value
ids := make([]*querypb.Value, len(vindexKeys))
for i, vik := range vindexKeys {
ids[i] = sqltypes.ValueToProto(vik)
}

// RangeMap using the Vindex
destinations, err := vindex.RangeMap(ctx, vcursor, vindexKeys[0], vindexKeys[1])
if err != nil {
return nil, nil, err

}

// And use the Resolver to map to ResolvedShards.
return vcursor.ResolveDestinations(ctx, keyspace.Name, ids, destinations)
}

func shardVars(bv map[string]*querypb.BindVariable, mapVals [][]*querypb.Value) []map[string]*querypb.BindVariable {
shardVars := make([]map[string]*querypb.BindVariable, len(mapVals))
for i, vals := range mapVals {
Expand Down
32 changes: 32 additions & 0 deletions go/vt/vtgate/planbuilder/operators/sharded_routing.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ func (tr *ShardedRouting) resetRoutingLogic(ctx *plancontext.PlanningContext) Ro
func (tr *ShardedRouting) searchForNewVindexes(ctx *plancontext.PlanningContext, predicate sqlparser.Expr) (Routing, bool) {
newVindexFound := false
switch node := predicate.(type) {
case *sqlparser.BetweenExpr:
return tr.planBetweenOp(ctx, node)

case *sqlparser.ComparisonExpr:
return tr.planComparison(ctx, node)

Expand All @@ -234,6 +237,35 @@ func (tr *ShardedRouting) searchForNewVindexes(ctx *plancontext.PlanningContext,
return nil, newVindexFound
}

func (tr *ShardedRouting) planBetweenOp(ctx *plancontext.PlanningContext, node *sqlparser.BetweenExpr) (routing Routing, foundNew bool) {
column, ok := node.Left.(*sqlparser.ColName)
if !ok {
return nil, false
}
var vdValue sqlparser.ValTuple = sqlparser.ValTuple([]sqlparser.Expr{node.From, node.To})

opcode := func(vindex *vindexes.ColumnVindex) engine.Opcode {
if _, ok := vindex.Vindex.(vindexes.Sequential); ok {
return engine.Between
}
return engine.Scatter
}

sequentialVdx := func(vindex *vindexes.ColumnVindex) vindexes.Vindex {
if _, ok := vindex.Vindex.(vindexes.Sequential); ok {
return vindex.Vindex
}
// if vindex is not of type Sequential, we can't use this vindex at all
return nil
}

val := makeEvalEngineExpr(ctx, vdValue)
if val == nil {
return nil, false
}
return nil, tr.haveMatchingVindex(ctx, node, vdValue, column, val, opcode, sequentialVdx)
}

func (tr *ShardedRouting) planComparison(ctx *plancontext.PlanningContext, cmp *sqlparser.ComparisonExpr) (routing Routing, foundNew bool) {
switch cmp.Operator {
case sqlparser.EqualOp:
Expand Down
2 changes: 2 additions & 0 deletions go/vt/vtgate/planbuilder/plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ func (s *planTestSuite) TestPlan() {
s.addPKsProvided(vschema, "user", []string{"user_extra"}, []string{"id", "user_id"})
s.addPKsProvided(vschema, "ordering", []string{"order"}, []string{"oid", "region_id"})
s.addPKsProvided(vschema, "ordering", []string{"order_event"}, []string{"oid", "ename"})
s.addPKsProvided(vschema, "sequential", []string{"seq_tbl"}, []string{"id"})
s.addPKsProvided(vschema, "sequential", []string{"seq_multicol_tbl"}, []string{"cola", "colb"})

// You will notice that some tests expect user.Id instead of user.id.
// This is because we now pre-create vindex columns in the symbol
Expand Down
95 changes: 95 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/filter_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -4820,5 +4820,100 @@
"user.authoritative"
]
}
},
{
"comment": "Between clause on primary indexed id column (binary vindex on id)",
"query": "select id from seq_tbl where id between 1 and 5",
"plan": {
"QueryType": "SELECT",
"Original": "select id from seq_tbl where id between 1 and 5",
"Instructions": {
"OperatorType": "Route",
"Variant": "Between",
"Keyspace": {
"Name": "sequential",
"Sharded": true
},
"FieldQuery": "select id from seq_tbl where 1 != 1",
"Query": "select id from seq_tbl where id between 1 and 5",
"Table": "seq_tbl",
"Values": [
"(1, 5)"
],
"Vindex": "binary"
},
"TablesUsed": [
"sequential.seq_tbl"
]
}
},
{
"comment": "Between clause on group_id column (xxhash vindex on group_id)",
"query": "select id, group_id from seq_tbl where group_id between 1 and 5",
"plan": {
"QueryType": "SELECT",
"Original": "select id, group_id from seq_tbl where group_id between 1 and 5",
"Instructions": {
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "sequential",
"Sharded": true
},
"FieldQuery": "select id, group_id from seq_tbl where 1 != 1",
"Query": "select id, group_id from seq_tbl where group_id between 1 and 5",
"Table": "seq_tbl"
},
"TablesUsed": [
"sequential.seq_tbl"
]
}
},
{
"comment": "Between clause on col2 column (there is no vindex on this column)",
"query": "select id, col2 from seq_tbl where col2 between 10 and 50",
"plan": {
"QueryType": "SELECT",
"Original": "select id, col2 from seq_tbl where col2 between 10 and 50",
"Instructions": {
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "sequential",
"Sharded": true
},
"FieldQuery": "select id, col2 from seq_tbl where 1 != 1",
"Query": "select id, col2 from seq_tbl where col2 between 10 and 50",
"Table": "seq_tbl"
},
"TablesUsed": [
"sequential.seq_tbl"
]
}
},
{
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice set of planner tests. could you add at least one test where the values in the between are not literal values, but instead columns from another table. something like:

select * 
from tblA 
  join tblB on tblA.foo = tbl.bar 
where tblA.X between tblB.C and tblB.D

it'd be good to make sure that this also works as expected.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for suggesting this , added a test case now 0697cb0 , and also fixed missing Cost for RoutOpCode.Between.

"comment": "Between clause on multicolumn vindex (cola,colb)",
"query": "select cola,colb,col1 from seq_multicol_tbl where cola between 1 and 5",
"plan": {
"QueryType": "SELECT",
"Original": "select cola,colb,col1 from seq_multicol_tbl where cola between 1 and 5",
"Instructions": {
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "sequential",
"Sharded": true
},
"FieldQuery": "select cola, colb, col1 from seq_multicol_tbl where 1 != 1",
"Query": "select cola, colb, col1 from seq_multicol_tbl where cola between 1 and 5",
"Table": "seq_multicol_tbl",
"Values": [
"(1, 5)"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why we get the values field here for a Scatter route. it doesn't look right

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for catching this , noticed processSingleColumnVindex has few extra early checks compared to processMultiColumnVindex. With 8a0ded0 , added checks for vindex and also routingCode before processing any futher.

This ensures Values is not set in case of Scatter route.

Please help to review if addition of these checks has any other side effects (which i may have missed) , ensured all test cases are passing.

]
},
"TablesUsed": [
"sequential.seq_multicol_tbl"
]
}
}
]
55 changes: 55 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/vschemas/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,61 @@
]
}
}
},
"sequential": {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure, but I'm thinking we could use an existing keyspace instead of adding a new one. feel free to add a vindex or even a table if you need to.
I'm just thinking of not adding unnecessary complexity if we don't have to

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure thing , incorporated with 5a7c8f1 , using user keyspace - added extra vindex and table

"sharded": true,
"vindexes": {
"xxhash": {
"type": "xxhash"
},
"binary": {
"type": "binary"
},
"multicolIdx": {
"type": "multiCol_test"
}
},
"tables": {
"seq_tbl": {
"column_vindexes": [
{
"column": "id",
"name": "binary"
},
{
"column": "group_id",
"name": "xxhash"
}
],
"columns": [
{
"name": "col1",
"type": "VARCHAR"
},
{
"name": "col2",
"type": "INT16"
}
]
},
"seq_multicol_tbl": {
"column_vindexes": [
{
"columns": [
"cola",
"colb"
],
"name": "multicolIdx"
}
],
"columns" : [
{
"name": "col1",
"type": "INT16"
}
]
}
}
}
}
}
15 changes: 15 additions & 0 deletions go/vt/vtgate/vindexes/binary.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ var (
_ Reversible = (*Binary)(nil)
_ Hashing = (*Binary)(nil)
_ ParamValidating = (*Binary)(nil)
_ Sequential = (*Binary)(nil)
)

// Binary is a vindex that converts binary bits to a keyspace id.
Expand Down Expand Up @@ -108,6 +109,20 @@ func (*Binary) ReverseMap(_ VCursor, ksids [][]byte) ([]sqltypes.Value, error) {
return reverseIds, nil
}

// RangeMap can map ids to key.Destination objects.
func (vind *Binary) RangeMap(ctx context.Context, vcursor VCursor, startId sqltypes.Value, endId sqltypes.Value) ([]key.Destination, error) {
startKsId, err := vind.Hash(startId)
if err != nil {
return nil, err
}
endKsId, err := vind.Hash(endId)
if err != nil {
return nil, err
}
out := []key.Destination{&key.DestinationKeyRange{KeyRange: key.NewKeyRange(startKsId, endKsId)}}
return out, nil
}

// UnknownParams implements the ParamValidating interface.
func (vind *Binary) UnknownParams() []string {
return vind.unknownParams
Expand Down
18 changes: 18 additions & 0 deletions go/vt/vtgate/vindexes/binary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"reflect"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"vitess.io/vitess/go/sqltypes"
Expand Down Expand Up @@ -145,3 +146,20 @@ func TestBinaryReverseMap(t *testing.T) {
t.Errorf("ReverseMap(): %v, want %s", err, wantErr)
}
}

// TestBinaryRangeMap takes start and env values,
// and checks against a destination keyrange.
func TestBinaryRangeMap(t *testing.T) {
systay marked this conversation as resolved.
Show resolved Hide resolved

startInterval := "0x01"
endInterval := "0x10"

got, err := binOnlyVindex.(Sequential).RangeMap(context.Background(), nil, sqltypes.NewHexNum([]byte(startInterval)),
sqltypes.NewHexNum([]byte(endInterval)))
require.NoError(t, err)
want := "DestinationKeyRange(01-10)"
if !assert.Equal(t, got[0].String(), want) {
t.Errorf("RangeMap(): %+v, want %+v", got, want)
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if !assert.Equal(t, got[0].String(), want) {
t.Errorf("RangeMap(): %+v, want %+v", got, want)
}
assert.Equal(t, want, got[0].String())

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks , done with 5a7c8f1


}
15 changes: 15 additions & 0 deletions go/vt/vtgate/vindexes/numeric.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ var (
_ Reversible = (*Numeric)(nil)
_ Hashing = (*Numeric)(nil)
_ ParamValidating = (*Numeric)(nil)
_ Sequential = (*Numeric)(nil)
)

// Numeric defines a bit-pattern mapping of a uint64 to the KeyspaceId.
Expand Down Expand Up @@ -108,6 +109,20 @@ func (*Numeric) ReverseMap(_ VCursor, ksids [][]byte) ([]sqltypes.Value, error)
return reverseIds, nil
}

// RangeMap implements Between.
func (vind *Numeric) RangeMap(ctx context.Context, vcursor VCursor, startId sqltypes.Value, endId sqltypes.Value) ([]key.Destination, error) {
startKsId, err := vind.Hash(startId)
if err != nil {
return nil, err
}
endKsId, err := vind.Hash(endId)
if err != nil {
return nil, err
}
out := []key.Destination{&key.DestinationKeyRange{KeyRange: key.NewKeyRange(startKsId, endKsId)}}
return out, nil
}

// UnknownParams implements the ParamValidating interface.
func (vind *Numeric) UnknownParams() []string {
return vind.unknownParams
Expand Down
Loading