Commit 039a2df2 authored by Andrew Kimball's avatar Andrew Kimball

opt: Fix panic in SQLSmith query

A SQLSmith query exposed a bug in the expression interner. This bug caused
the interner to consider empty literal arrays to be equal, even if their
static types are different. For example:

  ARRAY[]:::string[]
  ARRAY[]:::int[]

An empty string array should not be treated as the same as an empty int
array. The fix is to consult the static type of the array in addition to the
types of its elements (but only when there are zero elements or one of the
elements is null).

In addition, I took the opportunity to make the existing Tuple interner code
faster, by only comparing static types when lables or nulls are present. I
also fixed the bytes reuse code by storing back resized byte arrays.

Fixes #34439

Release note: None
parent 48b686f3
......@@ -363,16 +363,32 @@ func (h *hasher) HashDatum(val tree.Datum) {
case *tree.DJSON:
h.HashString(t.String())
case *tree.DTuple:
for _, d := range t.D {
h.HashDatum(d)
}
h.HashDatumType(t.ResolvedType())
// If labels are present, then hash of tuple's static type is needed to
// disambiguate when everything is the same except labels.
alwaysHashType := len(t.ResolvedType().(types.TTuple).Labels) != 0
h.hashDatumsWithType(t.D, t.ResolvedType(), alwaysHashType)
case *tree.DArray:
for _, d := range t.Array {
h.HashDatum(d)
}
// If the array is empty, then hash of tuple's static type is needed to
// disambiguate.
alwaysHashType := len(t.Array) == 0
h.hashDatumsWithType(t.Array, t.ResolvedType(), alwaysHashType)
default:
h.HashBytes(encodeDatum(h.bytes[:0], val))
h.bytes = encodeDatum(h.bytes[:0], val)
h.HashBytes(h.bytes)
}
}
func (h *hasher) hashDatumsWithType(datums tree.Datums, typ types.T, alwaysHashType bool) {
for _, d := range datums {
if d == tree.DNull {
// At least one NULL exists, so need to compare static types (e.g. a
// NULL::int is indistinguishable from NULL::string).
alwaysHashType = true
}
h.HashDatum(d)
}
if alwaysHashType {
h.HashDatumType(typ)
}
}
......@@ -381,9 +397,10 @@ func (h *hasher) HashDatumType(val types.T) {
}
func (h *hasher) HashColType(val coltypes.T) {
buf := bytes.NewBuffer(h.bytes)
buf := bytes.NewBuffer(h.bytes[:0])
val.Format(buf, lex.EncNoFlags)
h.HashBytes(buf.Bytes())
h.bytes = buf.Bytes()
h.HashBytes(h.bytes)
}
func (h *hasher) HashTypedExpr(val tree.TypedExpr) {
......@@ -579,11 +596,13 @@ func (h *hasher) IsDatumTypeEqual(l, r types.T) bool {
}
func (h *hasher) IsColTypeEqual(l, r coltypes.T) bool {
lbuf := bytes.NewBuffer(h.bytes)
lbuf := bytes.NewBuffer(h.bytes[:0])
l.Format(lbuf, lex.EncNoFlags)
rbuf := bytes.NewBuffer(h.bytes2)
rbuf := bytes.NewBuffer(h.bytes2[:0])
r.Format(rbuf, lex.EncNoFlags)
return bytes.Equal(lbuf.Bytes(), rbuf.Bytes())
h.bytes = lbuf.Bytes()
h.bytes2 = rbuf.Bytes()
return bytes.Equal(h.bytes, h.bytes2)
}
func (h *hasher) IsDatumEqual(l, r tree.Datum) bool {
......@@ -622,37 +641,56 @@ func (h *hasher) IsDatumEqual(l, r tree.Datum) bool {
}
case *tree.DTuple:
if rt, ok := r.(*tree.DTuple); ok {
if len(lt.D) != len(rt.D) {
// Compare datums and then compare static types if nulls or labels
// are present.
ltyp := lt.ResolvedType().(types.TTuple)
rtyp := rt.ResolvedType().(types.TTuple)
if !h.areDatumsWithTypeEqual(lt.D, rt.D, ltyp, rtyp) {
return false
}
for i := range lt.D {
if !h.IsDatumEqual(lt.D[i], rt.D[i]) {
return false
}
}
return h.IsDatumTypeEqual(l.ResolvedType(), r.ResolvedType())
return len(ltyp.Labels) == 0 || h.IsDatumTypeEqual(ltyp, rtyp)
}
case *tree.DArray:
if rt, ok := r.(*tree.DArray); ok {
if len(lt.Array) != len(rt.Array) {
// Compare datums and then compare static types if nulls are present
// or if arrays are empty.
ltyp := lt.ResolvedType()
rtyp := rt.ResolvedType()
if !h.areDatumsWithTypeEqual(lt.Array, rt.Array, ltyp, rtyp) {
return false
}
for i := range lt.Array {
if !h.IsDatumEqual(lt.Array[i], rt.Array[i]) {
return false
}
}
return true
return len(lt.Array) != 0 || h.IsDatumTypeEqual(ltyp, rtyp)
}
default:
lb := encodeDatum(h.bytes[:0], l)
rb := encodeDatum(h.bytes2[:0], r)
return bytes.Equal(lb, rb)
h.bytes = encodeDatum(h.bytes[:0], l)
h.bytes2 = encodeDatum(h.bytes2[:0], r)
return bytes.Equal(h.bytes, h.bytes2)
}
return false
}
func (h *hasher) areDatumsWithTypeEqual(ldatums, rdatums tree.Datums, ltyp, rtyp types.T) bool {
if len(ldatums) != len(rdatums) {
return false
}
foundNull := false
for i := range ldatums {
if !h.IsDatumEqual(ldatums[i], rdatums[i]) {
return false
}
if ldatums[i] == tree.DNull {
// At least one NULL exists, so need to compare static types (e.g. a
// NULL::int is indistinguishable from NULL::string).
foundNull = true
}
}
if foundNull {
return h.IsDatumTypeEqual(ltyp, rtyp)
}
return true
}
func (h *hasher) IsTypedExprEqual(l, r tree.TypedExpr) bool {
return l == r
}
......
......@@ -42,11 +42,16 @@ func TestInterner(t *testing.T) {
tupTyp2 := types.TTuple{Types: []types.T{types.Int, types.String}, Labels: []string{"a", "b"}}
tupTyp3 := types.TTuple{Types: []types.T{types.Int, types.String}}
tupTyp4 := types.TTuple{Types: []types.T{types.Int, types.String, types.Bool}}
tupTyp5 := types.TTuple{Types: []types.T{types.Int, types.String}, Labels: []string{"c", "d"}}
tupTyp6 := types.TTuple{Types: []types.T{types.String, types.Int}, Labels: []string{"c", "d"}}
tup1 := tree.NewDTuple(tupTyp1, tree.NewDInt(100), tree.NewDString("foo"))
tup2 := tree.NewDTuple(tupTyp2, tree.NewDInt(100), tree.NewDString("foo"))
tup3 := tree.NewDTuple(tupTyp3, tree.NewDInt(100), tree.NewDString("foo"))
tup4 := tree.NewDTuple(tupTyp4, tree.NewDInt(100), tree.NewDString("foo"), tree.DBoolTrue)
tup5 := tree.NewDTuple(tupTyp5, tree.NewDInt(100), tree.NewDString("foo"))
tup6 := tree.NewDTuple(tupTyp5, tree.DNull, tree.DNull)
tup7 := tree.NewDTuple(tupTyp6, tree.DNull, tree.DNull)
arr1 := tree.NewDArray(tupTyp1)
arr1.Array = tree.Datums{tup1, tup2}
......@@ -54,6 +59,14 @@ func TestInterner(t *testing.T) {
arr2.Array = tree.Datums{tup2, tup1}
arr3 := tree.NewDArray(tupTyp3)
arr3.Array = tree.Datums{tup2, tup3}
arr4 := tree.NewDArray(types.Int)
arr4.Array = tree.Datums{tree.DNull}
arr5 := tree.NewDArray(types.String)
arr5.Array = tree.Datums{tree.DNull}
arr6 := tree.NewDArray(types.Int)
arr6.Array = tree.Datums{}
arr7 := tree.NewDArray(types.String)
arr7.Array = tree.Datums{}
dec1, _ := tree.ParseDDecimal("1.0")
dec2, _ := tree.ParseDDecimal("1.0")
......@@ -187,9 +200,14 @@ func TestInterner(t *testing.T) {
{val1: tup1, val2: tup2, equal: true},
{val1: tup2, val2: tup3, equal: false},
{val1: tup3, val2: tup4, equal: false},
{val1: tup1, val2: tup5, equal: false},
{val1: tup6, val2: tup7, equal: false},
{val1: arr1, val2: arr2, equal: true},
{val1: arr2, val2: arr3, equal: false},
{val1: arr4, val2: arr5, equal: false},
{val1: arr4, val2: arr6, equal: false},
{val1: arr6, val2: arr7, equal: false},
{val1: dec1, val2: dec2, equal: true},
{val1: dec2, val2: dec3, equal: false},
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment