From fd7ff09ff91252b694ea84a29e42387251e53118 Mon Sep 17 00:00:00 2001 From: Zhou Kunqin <25057648+time-and-fate@users.noreply.github.com> Date: Wed, 13 Sep 2023 23:22:39 +0800 Subject: [PATCH] *: support collect and async load stats for mv index (#46731) ref pingcap/tidb#46539 --- executor/analyze_idx.go | 17 +- executor/test/analyzetest/analyze_test.go | 258 +++++++++++++++++++++- planner/cardinality/row_count_index.go | 6 +- planner/core/indexmerge_path_test.go | 8 +- planner/core/planbuilder.go | 40 ++-- planner/core/stats.go | 2 +- statistics/BUILD.bazel | 2 +- statistics/analyze.go | 16 ++ statistics/handle/handle.go | 43 +++- statistics/histogram.go | 39 ++++ statistics/histogram_test.go | 143 ++++++++++++ statistics/table.go | 8 +- 12 files changed, 547 insertions(+), 35 deletions(-) diff --git a/executor/analyze_idx.go b/executor/analyze_idx.go index 257127ef09c19..ba82dfe937ea7 100644 --- a/executor/analyze_idx.go +++ b/executor/analyze_idx.go @@ -66,13 +66,15 @@ func analyzeIndexPushdown(idxExec *AnalyzeIndexExec) *statistics.AnalyzeResults if idxExec.analyzePB.IdxReq.Version != nil { statsVer = int(*idxExec.analyzePB.IdxReq.Version) } - result := &statistics.AnalyzeResult{ + idxResult := &statistics.AnalyzeResult{ Hist: []*statistics.Histogram{hist}, - Cms: []*statistics.CMSketch{cms}, TopNs: []*statistics.TopN{topN}, Fms: []*statistics.FMSketch{fms}, IsIndex: 1, } + if statsVer != statistics.Version2 { + idxResult.Cms = []*statistics.CMSketch{cms} + } cnt := hist.NullCount if hist.Len() > 0 { cnt += hist.Buckets[hist.Len()-1].Count @@ -80,14 +82,18 @@ func analyzeIndexPushdown(idxExec *AnalyzeIndexExec) *statistics.AnalyzeResults if topN.TotalCount() > 0 { cnt += int64(topN.TotalCount()) } - return &statistics.AnalyzeResults{ + result := &statistics.AnalyzeResults{ TableID: idxExec.tableID, - Ars: []*statistics.AnalyzeResult{result}, + Ars: []*statistics.AnalyzeResult{idxResult}, Job: idxExec.job, StatsVer: statsVer, Count: cnt, Snapshot: idxExec.snapshot, } + if idxExec.idxInfo.MVIndex { + result.ForMVIndex = true + } + return result } func (e *AnalyzeIndexExec) buildStats(ranges []*ranger.Range, considerNull bool) (hist *statistics.Histogram, cms *statistics.CMSketch, fms *statistics.FMSketch, topN *statistics.TopN, err error) { @@ -224,6 +230,9 @@ func (e *AnalyzeIndexExec) buildStatsFromResult(result distsql.SelectResult, nee if needCMS && topn.TotalCount() > 0 { hist.RemoveVals(topn.TopN) } + if statsVer == statistics.Version2 { + hist.StandardizeForV2AnalyzeIndex() + } if needCMS && cms != nil { cms.CalcDefaultValForAnalyze(uint64(hist.NDV)) } diff --git a/executor/test/analyzetest/analyze_test.go b/executor/test/analyzetest/analyze_test.go index adf57bc0dc96a..912ae3466a4bc 100644 --- a/executor/test/analyzetest/analyze_test.go +++ b/executor/test/analyzetest/analyze_test.go @@ -16,6 +16,7 @@ package analyzetest import ( "context" + "encoding/json" "fmt" "strconv" "strings" @@ -3060,9 +3061,12 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) { tk.MustQuery("show warnings").Sort().Check(testkit.Rows(""+ "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", "Warning 1105 Columns b are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats", - "Warning 1105 analyzing multi-valued indexes is not supported, skip idx_c")) - tk.MustQuery("select job_info from mysql.analyze_jobs where table_schema = 'test' and table_name = 't'").Check(testkit.Rows( - "analyze table columns a, b with 256 buckets, 500 topn, 1 samplerate")) + )) + tk.MustQuery("select job_info from mysql.analyze_jobs where table_schema = 'test' and table_name = 't'").Sort().Check( + testkit.Rows( + "analyze index idx_c", + "analyze table columns a, b with 256 buckets, 500 topn, 1 samplerate", + )) is := dom.InfoSchema() tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) @@ -3073,7 +3077,7 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) { require.True(t, stats.Columns[tblInfo.Columns[1].ID].IsStatsInitialized()) require.False(t, stats.Columns[tblInfo.Columns[2].ID].IsStatsInitialized()) require.True(t, stats.Indices[tblInfo.Indices[0].ID].IsStatsInitialized()) - require.False(t, stats.Indices[tblInfo.Indices[1].ID].IsStatsInitialized()) + require.True(t, stats.Indices[tblInfo.Indices[1].ID].IsStatsInitialized()) } func TestManualAnalyzeSkipColumnTypes(t *testing.T) { @@ -3108,3 +3112,249 @@ func TestAutoAnalyzeSkipColumnTypes(t *testing.T) { require.True(t, h.HandleAutoAnalyze(dom.InfoSchema())) tk.MustQuery("select job_info from mysql.analyze_jobs where job_info like '%auto analyze table%'").Check(testkit.Rows("auto analyze table columns a, b, d with 256 buckets, 500 topn, 1 samplerate")) } + +// TestAnalyzeMVIndex tests analyzing the mv index use some real data in the table. +// It checks the analyze jobs, async loading and the stats content in the memory. +func TestAnalyzeMVIndex(t *testing.T) { + // 1. prepare the table and insert data + store, dom := testkit.CreateMockStoreAndDomain(t) + h := dom.StatsHandle() + oriLease := h.Lease() + h.SetLease(1) + defer func() { + h.SetLease(oriLease) + }() + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a int, j json, index ia(a)," + + "index ij_signed((cast(j->'$.signed' as signed array)))," + + "index ij_unsigned((cast(j->'$.unsigned' as unsigned array)))," + + // date currently incompatible with mysql + //"index ij_date((cast(j->'$.dt' as date array)))," + + // datetime currently incompatible with mysql + //"index ij_datetime((cast(j->'$.dttm' as datetime(6) array)))," + + // time currently incompatible with mysql + //"index ij_time((cast(j->'$.tm' as time(6) array)))," + + "index ij_double((cast(j->'$.dbl' as double array)))," + + // decimal not supported yet + //"index ij_decimal((cast(j->'$.dcm' as decimal(15,5) array)))," + + "index ij_binary((cast(j->'$.bin' as binary(50) array)))," + + "index ij_char((cast(j->'$.char' as char(50) array)))" + + ")") + require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh())) + jsonData := []map[string]interface{}{ + { + "signed": []int64{1, 2, 300, 300, 0, 4, 5, -40000}, + "unsigned": []uint64{0, 3, 4, 600, 12}, + "dt": []string{"2020-01-23", "2021-03-21", "2011-11-11", "2015-06-18", "1990-03-21", "2050-12-12"}, + "dttm": []string{"2021-01-11 12:00:00.123456", "2025-05-15 15:50:00.5", "2020-01-01 18:17:16.555", "2100-01-01 15:16:17", "1950-01-01 00:00:00.00008"}, + "tm": []string{"100:00:30.5", "-321:00:01.16"}, + "dbl": []float64{-21.5, 2.15, 10.555555, 0.000005, 0.00}, + "dcm": []float64{1.1, 2.2, 10.1234, -12.34, -1000.56789}, + "bin": []string{"aaaaaa", "bbbb", "ppp", "ccc", "asdf", "qwer", "yuiop", "1234", "5678", "0000", "zzzz"}, + "char": []string{"aaa", "cccccc", "eee", "asdf", "qwer", "yuiop", "!@#$"}, + }, + { + "signed": []int64{1, 2, 300, 300, 0, 4, 5, -40000}, + "unsigned": []uint64{0, 3, 4, 600, 12}, + "dt": []string{"2020-01-23", "2021-03-21", "2011-11-11", "2015-06-18", "1990-03-21", "2050-12-12"}, + "dttm": []string{"2021-01-11 12:00:00.123456", "2025-05-15 15:50:00.5", "2020-01-01 18:17:16.555", "2100-01-01 15:16:17", "1950-01-01 00:00:00.00008"}, + "tm": []string{"100:00:30.5", "-321:00:01.16", "09:11:47", "8:50.10"}, + "dbl": []float64{-21.5, 2.15, 10.555555, 0.000005, 0.00, 10.9876}, + "dcm": []float64{1.1, 2.2, 10.1234, -12.34, 987.654}, + "bin": []string{"aaaaaa", "bbbb", "ppp", "ccc", "asdf", "qwer", "ghjk", "0000", "zzzz"}, + "char": []string{"aaa", "cccccc", "eee", "asdf", "qwer", "yuiop", "!@#$"}, + }, + { + "signed": []int64{1, 2, 300, 300, 0, 4, -5, 13245}, + "unsigned": []uint64{0, 3, 4, 600, 3112}, + "dt": []string{"2020-01-23", "2021-03-21", "2011-11-11", "2015-06-18", "1990-03-21", "2050-12-12"}, + "dttm": []string{"2021-01-11 12:00:00.123456", "2025-05-15 15:50:00.5", "2020-01-01 18:17:16.555", "2340-01-01 15:16:17", "1950-01-01 00:00:00.00008"}, + "tm": []string{"100:00:30.5", "-321:00:01.16", "09:11:47", "8:50.10", "1:10:43"}, + "dbl": []float64{-21.5, 2.15, 10.555555, -12.000005, 0.00, 10.9876}, + "dcm": []float64{1.1, 2.2, 10.1234, -12.34, 987.654}, + "bin": []string{"aaaaaa", "bbbb", "ppp", "ccc", "asdf", "qwer", "1234", "0000", "zzzz"}, + "char": []string{"aaa", "cccccc", "eee", "asdf", "qwer", "yuiop", "!@#$"}, + }, + { + "signed": []int64{1, 2, 300, 300, 0, 4, -5, 13245}, + "unsigned": []uint64{0, 3, 4, 600, 3112}, + "dt": []string{"2020-01-23", "2021-03-21", "2011-11-11", "2015-06-18", "1990-03-21", "2050-12-12"}, + "dttm": []string{"2021-01-11 12:00:00.123456", "2025-05-15 15:50:00.5", "2110-01-01 18:17:16", "2340-01-01 15:16:17", "1950-01-01 00:00:00.00008"}, + "tm": []string{"100:00:30.5", "-321:00:01.16", "09:11:47", "8:50.10", "1:10:43"}, + "dbl": []float64{-21.5, 2.15, 10.555555, 0.000005, 0.00, 10.9876}, + "dcm": []float64{1.1, 2.2, 10.1234, -12.34, -123.654}, + "bin": []string{"aaaaaa", "bbbb", "ppp", "ccc", "egfb", "nfre", "1234", "0000", "zzzz"}, + "char": []string{"aaa", "cccccc", "eee", "asdf", "k!@cvd", "yuiop", "%*$%#@qwe"}, + }, + { + "signed": []int64{1, 2, 300, -300, 0, 100, -5, 13245}, + "unsigned": []uint64{0, 3, 4, 600, 3112}, + "dt": []string{"2020-01-23", "2021-03-21", "2011-11-11", "2015-06-18", "1990-03-21", "2050-12-12"}, + "dttm": []string{"2021-01-11 12:00:00.123456", "2025-05-15 15:50:00.5", "2110-01-01 22:17:16", "2340-01-22 15:16:17", "1950-01-01 00:12:00.00008"}, + "tm": []string{"100:00:30.5", "-321:00:01.16", "09:11:47", "8:5.10", "12:4:43"}, + "dbl": []float64{-21.5, 2.15, 10.555555, 0.000005, 0.00, 10.9876}, + "dcm": []float64{1.1, 2.2, 10.1234, -12.34, 987.654}, + "bin": []string{"aaaaaa", "bbbb", "ppp", "ccc", "egfb", "nfre", "1234", "3796", "zzzz"}, + "char": []string{"aaa", "cccccc", "eee", "asdf", "kicvd", "yuiop", "%*asdf@"}, + }, + { + "signed": []int64{1, 2, 300, 300, 0, 4, -5, 13245}, + "unsigned": []uint64{0, 3, 4, 600, 3112}, + "dt": []string{"2020-01-23", "2021-03-21", "2011-11-11", "2015-06-18", "1990-03-21", "2050-12-12"}, + "dttm": []string{"2021-01-11 12:00:00.123456", "2025-05-15 15:50:00.5", "2020-01-01 18:17:16.555", "2100-01-01 15:16:17", "1950-01-01 00:00:00.00008"}, + "tm": []string{"100:00:30.5", "-321:00:01.16", "09:11:47", "8:50.10", "1:10:43"}, + "dbl": []float64{-21.5, 2.15, 10.555555, 0.000005, 0.00, 10.9876}, + "dcm": []float64{1.1, 2.2, 10.1234, -12.34, 987.654}, + "bin": []string{"aaaaaa", "bbbb", "ppp", "ccc", "egfb", "nfre", "1234", "0000", "zzzz"}, + "char": []string{"aaa", "cccccc", "eee", "asdf", "k!@cvd", "yuiop", "%*$%#@qwe"}, + }, + } + for i := 0; i < 3; i++ { + jsonValue := jsonData[i] + jsonValueStr, err := json.Marshal(jsonValue) + require.NoError(t, err) + tk.MustExec(fmt.Sprintf("insert into t values (%d, '%s')", 1, jsonValueStr)) + } + tk.MustExec("insert into t select * from t") + tk.MustExec("insert into t select * from t") + tk.MustExec("insert into t select * from t") + for i := 3; i < 6; i++ { + jsonValue := jsonData[i] + jsonValueStr, err := json.Marshal(jsonValue) + require.NoError(t, err) + tk.MustExec(fmt.Sprintf("insert into t values (%d, '%s')", 1, jsonValueStr)) + } + require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll)) + + // 2. analyze and check analyze jobs + tk.MustExec("analyze table t with 1 samplerate, 3 topn") + tk.MustQuery("select id, table_schema, table_name, partition_name, job_info, processed_rows, state from mysql.analyze_jobs order by id"). + Check(testkit.Rows("1 test t analyze table columns a with 256 buckets, 3 topn, 1 samplerate 27 finished", + "2 test t analyze index ij_signed 190 finished", + "3 test t analyze index ij_unsigned 135 finished", + "4 test t analyze index ij_double 154 finished", + "5 test t analyze index ij_binary 259 finished", + "6 test t analyze index ij_char 189 finished", + )) + + // 3. check stats loading status and async load + // 3.1. now, stats on all indexes should be allEvicted, but these queries should trigger async loading + tk.MustQuery("explain format = brief select * from t where 1 member of (j->'$.signed')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_signed(cast(json_extract(`j`, _utf8mb4'$.signed') as signed array)) range:[1,1], keep order:false, stats:partial[ia:allEvicted, ij_signed:allEvicted, j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, ij_signed:allEvicted, j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where 1 member of (j->'$.unsigned')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_unsigned(cast(json_extract(`j`, _utf8mb4'$.unsigned') as unsigned array)) range:[1,1], keep order:false, stats:partial[ia:allEvicted, ij_unsigned:allEvicted, j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, ij_unsigned:allEvicted, j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where 10.01 member of (j->'$.dbl')").Check(testkit.Rows( + "TableReader 21.60 root data:Selection", + "└─Selection 21.60 cop[tikv] json_memberof(cast(10.01, json BINARY), json_extract(test.t.j, \"$.dbl\"))", + " └─TableFullScan 27.00 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where '1' member of (j->'$.bin')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_binary(cast(json_extract(`j`, _utf8mb4'$.bin') as binary(50) array)) range:[0x31,0x31], keep order:false, stats:partial[ia:allEvicted, ij_binary:allEvicted, j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, ij_binary:allEvicted, j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where '1' member of (j->'$.char')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_char(cast(json_extract(`j`, _utf8mb4'$.char') as char(50) array)) range:[0x31,0x31], keep order:false, stats:partial[ia:allEvicted, ij_char:allEvicted, j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[ia:allEvicted, ij_char:allEvicted, j:unInitialized]", + )) + // 3.2. emulate the background async loading + require.NoError(t, h.LoadNeededHistograms()) + // 3.3. now, stats on all indexes should be loaded + tk.MustQuery("explain format = brief select * from t where 1 member of (j->'$.signed')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_signed(cast(json_extract(`j`, _utf8mb4'$.signed') as signed array)) range:[1,1], keep order:false, stats:partial[j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where 1 member of (j->'$.unsigned')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_unsigned(cast(json_extract(`j`, _utf8mb4'$.unsigned') as unsigned array)) range:[1,1], keep order:false, stats:partial[j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where 10.01 member of (j->'$.dbl')").Check(testkit.Rows( + "TableReader 21.60 root data:Selection", + "└─Selection 21.60 cop[tikv] json_memberof(cast(10.01, json BINARY), json_extract(test.t.j, \"$.dbl\"))", + " └─TableFullScan 27.00 cop[tikv] table:t keep order:false, stats:partial[j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where '1' member of (j->'$.bin')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_binary(cast(json_extract(`j`, _utf8mb4'$.bin') as binary(50) array)) range:[0x31,0x31], keep order:false, stats:partial[j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[j:unInitialized]", + )) + tk.MustQuery("explain format = brief select * from t where '1' member of (j->'$.char')").Check(testkit.Rows( + "IndexMerge 0.03 root type: union", + "├─IndexRangeScan(Build) 0.03 cop[tikv] table:t, index:ij_char(cast(json_extract(`j`, _utf8mb4'$.char') as char(50) array)) range:[0x31,0x31], keep order:false, stats:partial[j:unInitialized]", + "└─TableRowIDScan(Probe) 0.03 cop[tikv] table:t keep order:false, stats:partial[j:unInitialized]", + )) + + // 4. check stats content in the memory + tk.MustQuery("show stats_meta").CheckAt([]int{0, 1, 4, 5}, testkit.Rows("test t 0 27")) + tk.MustQuery("show stats_histograms").CheckAt([]int{0, 1, 3, 4, 6, 7, 8, 9, 10}, testkit.Rows( + // db_name, table_name, column_name, is_index, distinct_count, null_count, avg_col_size, correlation, load_status + "test t a 0 1 0 1 1 allEvicted", + "test t ia 1 1 0 0 0 allLoaded", + "test t ij_signed 1 11 0 0 0 allLoaded", + "test t ij_unsigned 1 6 0 0 0 allLoaded", + "test t ij_double 1 7 0 0 0 allEvicted", + "test t ij_binary 1 15 0 0 0 allLoaded", + "test t ij_char 1 11 0 0 0 allLoaded", + )) + tk.MustQuery("show stats_topn").Check(testkit.Rows( + // db_name, table_name, partition_name, column_name, is_index, value, count + "test t ia 1 1 27", + "test t ij_signed 1 -40000 16", + "test t ij_signed 1 -300 1", + "test t ij_signed 1 -5 11", + "test t ij_unsigned 1 0 27", + "test t ij_unsigned 1 3 27", + "test t ij_unsigned 1 4 27", + "test t ij_binary 1 0000 26", + "test t ij_binary 1 1234 19", + "test t ij_binary 1 3796 1", + "test t ij_char 1 !@#$ 24", + "test t ij_char 1 %*$%#@qwe 2", + "test t ij_char 1 %*asdf@ 1", + )) + tk.MustQuery("show stats_buckets").Check(testkit.Rows( + // db_name, table_name, partition_name, column_name, is_index, bucket_id, count, repeats, lower_bound, upper_bound, ndv + "test t ij_signed 1 0 27 27 0 0 0", + "test t ij_signed 1 1 54 27 1 1 0", + "test t ij_signed 1 2 81 27 2 2 0", + "test t ij_signed 1 3 107 26 4 4 0", + "test t ij_signed 1 4 123 16 5 5 0", + "test t ij_signed 1 5 124 1 100 100 0", + "test t ij_signed 1 6 151 27 300 300 0", + "test t ij_signed 1 7 162 11 13245 13245 0", + "test t ij_unsigned 1 0 16 16 12 12 0", + "test t ij_unsigned 1 1 43 27 600 600 0", + "test t ij_unsigned 1 2 54 11 3112 3112 0", + "test t ij_binary 1 0 8 8 5678 5678 0", + "test t ij_binary 1 1 35 27 aaaaaa aaaaaa 0", + "test t ij_binary 1 2 59 24 asdf asdf 0", + "test t ij_binary 1 3 86 27 bbbb bbbb 0", + "test t ij_binary 1 4 113 27 ccc ccc 0", + "test t ij_binary 1 5 116 3 egfb egfb 0", + "test t ij_binary 1 6 124 8 ghjk ghjk 0", + "test t ij_binary 1 7 127 3 nfre nfre 0", + "test t ij_binary 1 8 154 27 ppp ppp 0", + "test t ij_binary 1 9 178 24 qwer qwer 0", + "test t ij_binary 1 10 186 8 yuiop yuiop 0", + "test t ij_binary 1 11 213 27 zzzz zzzz 0", + "test t ij_char 1 0 27 27 aaa aaa 0", + "test t ij_char 1 1 54 27 asdf asdf 0", + "test t ij_char 1 2 81 27 cccccc cccccc 0", + "test t ij_char 1 3 108 27 eee eee 0", + "test t ij_char 1 4 110 2 k!@cvd k!@cvd 0", + "test t ij_char 1 5 111 1 kicvd kicvd 0", + "test t ij_char 1 6 135 24 qwer qwer 0", + "test t ij_char 1 7 162 27 yuiop yuiop 0", + )) +} diff --git a/planner/cardinality/row_count_index.go b/planner/cardinality/row_count_index.go index 789972c20ad70..e841636846e89 100644 --- a/planner/cardinality/row_count_index.go +++ b/planner/cardinality/row_count_index.go @@ -49,16 +49,20 @@ func GetRowCountByIndexRanges(sctx sessionctx.Context, coll *statistics.HistColl sc := sctx.GetSessionVars().StmtCtx idx, ok := coll.Indices[idxID] colNames := make([]string, 0, 8) + isMVIndex := false if ok { if idx.Info != nil { name = idx.Info.Name.O for _, col := range idx.Info.Columns { colNames = append(colNames, col.Name.O) } + isMVIndex = idx.Info.MVIndex } } recordUsedItemStatsStatus(sctx, idx, coll.PhysicalID, idxID) - if !ok || idx.IsInvalid(sctx, coll.Pseudo) { + // For the mv index case, now we have supported collecting stats and async loading stats, but sync loading and + // estimation is not well-supported, so we keep mv index using pseudo estimation for this period of time. + if !ok || idx.IsInvalid(sctx, coll.Pseudo) || isMVIndex { colsLen := -1 if idx != nil && idx.Info.Unique { colsLen = len(idx.Info.Columns) diff --git a/planner/core/indexmerge_path_test.go b/planner/core/indexmerge_path_test.go index 8774ecd49d79f..bbf59f80e0439 100644 --- a/planner/core/indexmerge_path_test.go +++ b/planner/core/indexmerge_path_test.go @@ -27,7 +27,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestAnalyzeMVIndex(t *testing.T) { +func TestAnalyzeMVIndexWarnings(t *testing.T) { store := testkit.CreateMockStore(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") @@ -40,14 +40,12 @@ index idx2(a, b, (cast(j->'$.str' as char(10) array)), c))`) tk.MustExec("analyze table t") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", - "Warning 1105 analyzing multi-valued indexes is not supported, skip idx", - "Warning 1105 analyzing multi-valued indexes is not supported, skip idx2")) + )) tk.MustExec("analyze table t index idx") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"TiDB assumes that the table is empty, use sample-rate=1\"", "Warning 1105 The version 2 would collect all statistics not only the selected indexes", - "Warning 1105 analyzing multi-valued indexes is not supported, skip idx", - "Warning 1105 analyzing multi-valued indexes is not supported, skip idx2")) + )) tk.MustExec("set tidb_analyze_version=1") tk.MustExec("analyze table t") diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 149a3b0b7bef9..2754d7a0a44c1 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -2439,14 +2439,17 @@ func getColOffsetForAnalyze(colsInfo []*model.ColumnInfo, colID int64) int { // in tblInfo.Indices, index.Columns[i].Offset is set according to tblInfo.Columns. Since we decode row samples according to colsInfo rather than tbl.Columns // in the execution phase of ANALYZE, we need to modify index.Columns[i].Offset according to colInfos. // TODO: find a better way to find indexed columns in ANALYZE rather than use IndexColumn.Offset -func getModifiedIndexesInfoForAnalyze(sctx sessionctx.Context, tblInfo *model.TableInfo, allColumns bool, colsInfo []*model.ColumnInfo) []*model.IndexInfo { +// For multi-valued index, we need to collect it separately here and analyze it as independent index analyze task. +// See comments for AnalyzeResults.ForMVIndex for more details. +func getModifiedIndexesInfoForAnalyze(tblInfo *model.TableInfo, allColumns bool, colsInfo []*model.ColumnInfo) ([]*model.IndexInfo, []*model.IndexInfo) { idxsInfo := make([]*model.IndexInfo, 0, len(tblInfo.Indices)) + independentIdxsInfo := make([]*model.IndexInfo, 0) for _, originIdx := range tblInfo.Indices { if originIdx.State != model.StatePublic { continue } if originIdx.MVIndex { - sctx.GetSessionVars().StmtCtx.AppendWarning(errors.Errorf("analyzing multi-valued indexes is not supported, skip %s", originIdx.Name.L)) + independentIdxsInfo = append(independentIdxsInfo, originIdx) continue } if allColumns { @@ -2462,7 +2465,7 @@ func getModifiedIndexesInfoForAnalyze(sctx sessionctx.Context, tblInfo *model.Ta } idxsInfo = append(idxsInfo, idx) } - return idxsInfo + return idxsInfo, independentIdxsInfo } // filterSkipColumnTypes filters out columns whose types are in the skipTypes list. @@ -2501,14 +2504,13 @@ func (b *PlanBuilder) filterSkipColumnTypes(origin []*model.ColumnInfo, tbl *ast func (b *PlanBuilder) buildAnalyzeFullSamplingTask( as *ast.AnalyzeTableStmt, - tasks []AnalyzeColumnsTask, + analyzePlan *Analyze, physicalIDs []int64, partitionNames []string, tbl *ast.TableName, version int, persistOpts bool, - rsOptionsMap map[int64]V2AnalyzeOptions, -) ([]AnalyzeColumnsTask, error) { +) error { // Version 2 doesn't support incremental analyze. // And incremental analyze will be deprecated in the future. if as.Incremental { @@ -2517,12 +2519,12 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( astOpts, err := handleAnalyzeOptionsV2(as.AnalyzeOpts) if err != nil { - return nil, err + return err } // Get all column info which need to be analyzed. astColList, err := getAnalyzeColumnList(as.ColumnNames, tbl) if err != nil { - return nil, err + return err } var predicateCols, mustAnalyzedCols calcOnceMap @@ -2533,15 +2535,15 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( astColsInfo, _, err := b.getFullAnalyzeColumnsInfo(tbl, as.ColumnChoice, astColList, &predicateCols, &mustAnalyzedCols, mustAllColumns, true) if err != nil { - return nil, err + return err } isAnalyzeTable := len(as.PartitionNames) == 0 optionsMap, colsInfoMap, err := b.genV2AnalyzeOptions(persistOpts, tbl, isAnalyzeTable, physicalIDs, astOpts, as.ColumnChoice, astColList, &predicateCols, &mustAnalyzedCols, mustAllColumns) if err != nil { - return nil, err + return err } for physicalID, opts := range optionsMap { - rsOptionsMap[physicalID] = opts + analyzePlan.OptionsMap[physicalID] = opts } // Build tasks for each partition. @@ -2567,7 +2569,7 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( } execColsInfo = b.filterSkipColumnTypes(execColsInfo, tbl, &mustAnalyzedCols) allColumns := len(tbl.TableInfo.Columns) == len(execColsInfo) - indexes := getModifiedIndexesInfoForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo) + indexes, independentIndexes := getModifiedIndexesInfoForAnalyze(tbl.TableInfo, allColumns, execColsInfo) handleCols := BuildHandleColsForAnalyze(b.ctx, tbl.TableInfo, allColumns, execColsInfo) newTask := AnalyzeColumnsTask{ HandleCols: handleCols, @@ -2582,10 +2584,18 @@ func (b *PlanBuilder) buildAnalyzeFullSamplingTask( newTask.ColsInfo = append(newTask.ColsInfo, extraCol) newTask.HandleCols = &IntHandleCols{col: colInfoToColumn(extraCol, len(newTask.ColsInfo)-1)} } - tasks = append(tasks, newTask) + analyzePlan.ColTasks = append(analyzePlan.ColTasks, newTask) + for _, indexInfo := range independentIndexes { + newIdxTask := AnalyzeIndexTask{ + IndexInfo: indexInfo, + TblInfo: tbl.TableInfo, + AnalyzeInfo: info, + } + analyzePlan.IdxTasks = append(analyzePlan.IdxTasks, newIdxTask) + } } - return tasks, nil + return nil } func (b *PlanBuilder) genV2AnalyzeOptions( @@ -2791,7 +2801,7 @@ func (b *PlanBuilder) buildAnalyzeTable(as *ast.AnalyzeTableStmt, opts map[ast.A } var commonHandleInfo *model.IndexInfo if version == statistics.Version2 { - p.ColTasks, err = b.buildAnalyzeFullSamplingTask(as, p.ColTasks, physicalIDs, partitionNames, tbl, version, usePersistedOptions, p.OptionsMap) + err = b.buildAnalyzeFullSamplingTask(as, p, physicalIDs, partitionNames, tbl, version, usePersistedOptions) if err != nil { return nil, err } diff --git a/planner/core/stats.go b/planner/core/stats.go index 19da203c027f7..db2488c183d4e 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -254,7 +254,7 @@ func (ds *DataSource) initStats(colGroups [][]*expression.Column) { tableStats := &property.StatsInfo{ RowCount: float64(ds.statisticTable.RealtimeCount), ColNDVs: make(map[int64]float64, ds.schema.Len()), - HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.tableInfo, ds.schema.Columns), + HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.tableInfo, ds.TblCols), StatsVersion: ds.statisticTable.Version, } if ds.statisticTable.Pseudo { diff --git a/statistics/BUILD.bazel b/statistics/BUILD.bazel index f6977b6922722..aed693bb9b77a 100644 --- a/statistics/BUILD.bazel +++ b/statistics/BUILD.bazel @@ -77,7 +77,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 41, + shard_count = 42, deps = [ "//config", "//parser/ast", diff --git a/statistics/analyze.go b/statistics/analyze.go index 61cfe55474563..3abfd9ce989c0 100644 --- a/statistics/analyze.go +++ b/statistics/analyze.go @@ -83,4 +83,20 @@ type AnalyzeResults struct { BaseCount int64 // BaseModifyCnt is the original modify_count in mysql.stats_meta at the beginning of analyze. BaseModifyCnt int64 + // For multi-valued index analyze, there are some very different behaviors, so we add this field to indicate it. + // + // Analyze result of multi-valued index come from an independent v2 analyze index task (AnalyzeIndexExec), and it's + // done by a scan on the index data and building stats. According to the original design rational of v2 stats, we + // should use the same samples to build stats for all columns/indexes. We created an exceptional case here to avoid + // loading the samples of JSON columns to tidb, which may cost too much memory, and we can't handle such case very + // well now. + // + // As the definition of multi-valued index, the row count and NDV of this index may be higher than the table row + // count. So we can't use this result to update the table-level row count. + // The snapshot field is used by v2 analyze to check if there are concurrent analyze, so we also can't update it. + // The multi-valued index analyze task is always together with another normal v2 analyze table task, which will + // take care of those table-level fields. + // In conclusion, when saving the analyze result for mv index, we need to store the index stats, as for the + // table-level fields, we only need to update the version. + ForMVIndex bool } diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index 3096d6e9a28de..b2d2a27b687f6 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -1111,18 +1111,48 @@ func SaveTableStatsToStorage(sctx sessionctx.Context, results *statistics.Analyz if len(rows) > 0 { snapshot := rows[0].GetUint64(0) // A newer version analyze result has been written, so skip this writing. - if snapshot >= results.Snapshot && results.StatsVer == statistics.Version2 { + // For multi-valued index analyze, this check is not needed because we expect there's another normal v2 analyze + // table task that may update the snapshot in stats_meta table (that task may finish before or after this task). + if snapshot >= results.Snapshot && results.StatsVer == statistics.Version2 && !results.ForMVIndex { return nil } curCnt = int64(rows[0].GetUint64(1)) curModifyCnt = rows[0].GetInt64(2) } + if len(rows) == 0 || results.StatsVer != statistics.Version2 { - if _, err = exec.ExecuteInternal(ctx, "replace into mysql.stats_meta (version, table_id, count, snapshot) values (%?, %?, %?, %?)", version, tableID, results.Count, results.Snapshot); err != nil { + // 1-1. + // a. There's no existing records we can update, we must insert a new row. Or + // b. it's stats v1. + // In these cases, we use REPLACE INTO to directly insert/update the version, count and snapshot. + snapShot := results.Snapshot + count := results.Count + if results.ForMVIndex { + snapShot = 0 + count = 0 + } + if _, err = exec.ExecuteInternal(ctx, + "replace into mysql.stats_meta (version, table_id, count, snapshot) values (%?, %?, %?, %?)", + version, + tableID, + count, + snapShot, + ); err != nil { return err } statsVer = version + } else if results.ForMVIndex { + // 1-2. There's already an existing record for this table, and we are handling stats for mv index now. + // In this case, we only update the version. See comments for AnalyzeResults.ForMVIndex for more details. + if _, err = exec.ExecuteInternal(ctx, + "update mysql.stats_meta set version=%? where table_id=%?", + version, + tableID, + ); err != nil { + return err + } } else { + // 1-3. There's already an existing records for this table, and we are handling a normal v2 analyze. modifyCnt := curModifyCnt - results.BaseModifyCnt if modifyCnt < 0 { modifyCnt = 0 @@ -1154,7 +1184,14 @@ func SaveTableStatsToStorage(sctx sessionctx.Context, results *statistics.Analyz zap.Int64("results.Count", results.Count), zap.Int64("count", cnt)) } - if _, err = exec.ExecuteInternal(ctx, "update mysql.stats_meta set version=%?, modify_count=%?, count=%?, snapshot=%? where table_id=%?", version, modifyCnt, cnt, results.Snapshot, tableID); err != nil { + if _, err = exec.ExecuteInternal(ctx, + "update mysql.stats_meta set version=%?, modify_count=%?, count=%?, snapshot=%? where table_id=%?", + version, + modifyCnt, + cnt, + results.Snapshot, + tableID, + ); err != nil { return err } statsVer = version diff --git a/statistics/histogram.go b/statistics/histogram.go index 8ed870dfdec48..0476bc858d691 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -348,6 +348,45 @@ func (hg *Histogram) RemoveVals(valCntPairs []TopNMeta) { } } +// StandardizeForV2AnalyzeIndex fixes some "irregular" places in the Histogram, which come from current implementation of +// analyze index task in v2. +// For now, it does two things: 1. Remove empty buckets. 2. Reset Bucket.NDV to 0. +func (hg *Histogram) StandardizeForV2AnalyzeIndex() { + if hg == nil || len(hg.Buckets) == 0 { + return + } + // Note that hg.Buckets is []Bucket instead of []*Bucket, so we avoid extra memory allocation for the struct Bucket + // in the process below. + + // remainedBktIdxs are the positions of the eventually remained buckets in the original hg.Buckets slice. + remainedBktIdxs := make([]int, 0, len(hg.Buckets)) + // We use two pointers here. + // checkingIdx is the "fast" one, and it iterates the hg.Buckets and check if they are empty one by one. + // When we find a non-empty bucket, we move it to the position where nextRemainedBktIdx, which is the "slow" + // pointer, points to. + nextRemainedBktIdx := 0 + for checkingIdx := range hg.Buckets { + if hg.BucketCount(checkingIdx) <= 0 && hg.Buckets[checkingIdx].Repeat <= 0 { + continue + } + remainedBktIdxs = append(remainedBktIdxs, checkingIdx) + if nextRemainedBktIdx != checkingIdx { + hg.Buckets[nextRemainedBktIdx] = hg.Buckets[checkingIdx] + } + hg.Buckets[nextRemainedBktIdx].NDV = 0 + nextRemainedBktIdx++ + } + hg.Buckets = hg.Buckets[:nextRemainedBktIdx] + + // Get the new Bounds from the original Bounds according to the indexes we collect. + c := chunk.NewChunkWithCapacity([]*types.FieldType{hg.Tp}, len(remainedBktIdxs)) + for _, i := range remainedBktIdxs { + c.AppendDatum(0, hg.GetLower(i)) + c.AppendDatum(0, hg.GetUpper(i)) + } + hg.Bounds = c +} + // AddIdxVals adds the given values to the histogram. func (hg *Histogram) AddIdxVals(idxValCntPairs []TopNMeta) { totalAddCnt := int64(0) diff --git a/statistics/histogram_test.go b/statistics/histogram_test.go index f30b553c9222b..c03f442e97876 100644 --- a/statistics/histogram_test.go +++ b/statistics/histogram_test.go @@ -389,3 +389,146 @@ func TestIndexQueryBytes(t *testing.T) { // Repeat require.Equal(t, idx.QueryBytes(nil, high), uint64(10)) } + +type histogramInputAndOutput struct { + inputHist *Histogram + inputHistToStr string + outputHistToStr string +} + +func TestStandardizeForV2AnalyzeIndex(t *testing.T) { + // 1. prepare expected input and output histograms (in string) + testData := []*histogramInputAndOutput{ + { + inputHistToStr: "index:0 ndv:6\n" + + "num: 0 lower_bound: 111 upper_bound: 111 repeats: 0 ndv: 0\n" + + "num: 0 lower_bound: 123 upper_bound: 123 repeats: 0 ndv: 0\n" + + "num: 10 lower_bound: 34567 upper_bound: 5 repeats: 3 ndv: 2", + outputHistToStr: "index:0 ndv:6\n" + + "num: 10 lower_bound: 34567 upper_bound: 5 repeats: 3 ndv: 0", + }, + { + inputHistToStr: "index:0 ndv:6\n" + + "num: 0 lower_bound: 111 upper_bound: 111 repeats: 0 ndv: 0\n" + + "num: 0 lower_bound: 123 upper_bound: 123 repeats: 0 ndv: 0\n" + + "num: 0 lower_bound: 34567 upper_bound: 5 repeats: 0 ndv: 0", + outputHistToStr: "index:0 ndv:6", + }, + { + inputHistToStr: "index:0 ndv:6\n" + + "num: 10 lower_bound: 34567 upper_bound: 5 repeats: 3 ndv: 2\n" + + "num: 0 lower_bound: 876 upper_bound: 876 repeats: 0 ndv: 0\n" + + "num: 0 lower_bound: 990 upper_bound: 990 repeats: 0 ndv: 0", + outputHistToStr: "index:0 ndv:6\n" + + "num: 10 lower_bound: 34567 upper_bound: 5 repeats: 3 ndv: 0", + }, + { + inputHistToStr: "index:0 ndv:6\n" + + "num: 10 lower_bound: 111 upper_bound: 111 repeats: 10 ndv: 1\n" + + "num: 12 lower_bound: 123 upper_bound: 34567 repeats: 4 ndv: 20\n" + + "num: 10 lower_bound: 5 upper_bound: 990 repeats: 6 ndv: 2", + outputHistToStr: "index:0 ndv:6\n" + + "num: 10 lower_bound: 111 upper_bound: 111 repeats: 10 ndv: 0\n" + + "num: 12 lower_bound: 123 upper_bound: 34567 repeats: 4 ndv: 0\n" + + "num: 10 lower_bound: 5 upper_bound: 990 repeats: 6 ndv: 0", + }, + { + inputHistToStr: "index:0 ndv:6\n" + + "num: 0 lower_bound: 111 upper_bound: 111 repeats: 0 ndv: 0\n" + + "num: 0 lower_bound: 123 upper_bound: 123 repeats: 0 ndv: 0\n" + + "num: 10 lower_bound: 34567 upper_bound: 34567 repeats: 3 ndv: 2\n" + + "num: 0 lower_bound: 5 upper_bound: 5 repeats: 0 ndv: 0\n" + + "num: 0 lower_bound: 876 upper_bound: 876 repeats: 0 ndv: 0\n" + + "num: 10 lower_bound: 990 upper_bound: 990 repeats: 3 ndv: 2\n" + + "num: 10 lower_bound: 95 upper_bound: 95 repeats: 3 ndv: 2", + outputHistToStr: "index:0 ndv:6\n" + + "num: 10 lower_bound: 34567 upper_bound: 34567 repeats: 3 ndv: 0\n" + + "num: 10 lower_bound: 990 upper_bound: 990 repeats: 3 ndv: 0\n" + + "num: 10 lower_bound: 95 upper_bound: 95 repeats: 3 ndv: 0", + }, + { + inputHistToStr: "index:0 ndv:6\n" + + "num: 0 lower_bound: 111 upper_bound: 111 repeats: 0 ndv: 0\n" + + "num: 0 lower_bound: 123 upper_bound: 123 repeats: 0 ndv: 0\n" + + "num: 10 lower_bound: 34567 upper_bound: 34567 repeats: 3 ndv: 2\n" + + "num: 0 lower_bound: 5 upper_bound: 5 repeats: 0 ndv: 0\n" + + "num: 10 lower_bound: 876 upper_bound: 876 repeats: 3 ndv: 2\n" + + "num: 10 lower_bound: 990 upper_bound: 990 repeats: 3 ndv: 2\n" + + "num: 0 lower_bound: 95 upper_bound: 95 repeats: 0 ndv: 0", + outputHistToStr: "index:0 ndv:6\n" + + "num: 10 lower_bound: 34567 upper_bound: 34567 repeats: 3 ndv: 0\n" + + "num: 10 lower_bound: 876 upper_bound: 876 repeats: 3 ndv: 0\n" + + "num: 10 lower_bound: 990 upper_bound: 990 repeats: 3 ndv: 0", + }, + } + // 2. prepare the actual Histogram input + ctx := mock.NewContext() + sc := ctx.GetSessionVars().StmtCtx + val0, err := codec.EncodeKey(sc, nil, types.NewIntDatum(111)) + require.NoError(t, err) + val1, err := codec.EncodeKey(sc, nil, types.NewIntDatum(123)) + require.NoError(t, err) + val2, err := codec.EncodeKey(sc, nil, types.NewIntDatum(34567)) + require.NoError(t, err) + val3, err := codec.EncodeKey(sc, nil, types.NewIntDatum(5)) + require.NoError(t, err) + val4, err := codec.EncodeKey(sc, nil, types.NewIntDatum(876)) + require.NoError(t, err) + val5, err := codec.EncodeKey(sc, nil, types.NewIntDatum(990)) + require.NoError(t, err) + val6, err := codec.EncodeKey(sc, nil, types.NewIntDatum(95)) + require.NoError(t, err) + val0Bytes := types.NewBytesDatum(val0) + val1Bytes := types.NewBytesDatum(val1) + val2Bytes := types.NewBytesDatum(val2) + val3Bytes := types.NewBytesDatum(val3) + val4Bytes := types.NewBytesDatum(val4) + val5Bytes := types.NewBytesDatum(val5) + val6Bytes := types.NewBytesDatum(val6) + hist0 := NewHistogram(0, 6, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0) + hist0.AppendBucketWithNDV(&val0Bytes, &val0Bytes, 0, 0, 0) + hist0.AppendBucketWithNDV(&val1Bytes, &val1Bytes, 0, 0, 0) + hist0.AppendBucketWithNDV(&val2Bytes, &val3Bytes, 10, 3, 2) + testData[0].inputHist = hist0 + hist1 := NewHistogram(0, 6, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0) + hist1.AppendBucketWithNDV(&val0Bytes, &val0Bytes, 0, 0, 0) + hist1.AppendBucketWithNDV(&val1Bytes, &val1Bytes, 0, 0, 0) + hist1.AppendBucketWithNDV(&val2Bytes, &val3Bytes, 0, 0, 0) + testData[1].inputHist = hist1 + hist2 := NewHistogram(0, 6, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0) + hist2.AppendBucketWithNDV(&val2Bytes, &val3Bytes, 10, 3, 2) + hist2.AppendBucketWithNDV(&val4Bytes, &val4Bytes, 10, 0, 0) + hist2.AppendBucketWithNDV(&val5Bytes, &val5Bytes, 10, 0, 0) + testData[2].inputHist = hist2 + hist3 := NewHistogram(0, 6, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0) + hist3.AppendBucketWithNDV(&val0Bytes, &val0Bytes, 10, 10, 1) + hist3.AppendBucketWithNDV(&val1Bytes, &val2Bytes, 22, 4, 20) + hist3.AppendBucketWithNDV(&val3Bytes, &val5Bytes, 32, 6, 2) + testData[3].inputHist = hist3 + hist4 := NewHistogram(0, 6, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0) + hist4.AppendBucketWithNDV(&val0Bytes, &val0Bytes, 0, 0, 0) + hist4.AppendBucketWithNDV(&val1Bytes, &val1Bytes, 0, 0, 0) + hist4.AppendBucketWithNDV(&val2Bytes, &val2Bytes, 10, 3, 2) + hist4.AppendBucketWithNDV(&val3Bytes, &val3Bytes, 10, 0, 0) + hist4.AppendBucketWithNDV(&val4Bytes, &val4Bytes, 10, 0, 0) + hist4.AppendBucketWithNDV(&val5Bytes, &val5Bytes, 20, 3, 2) + hist4.AppendBucketWithNDV(&val6Bytes, &val6Bytes, 30, 3, 2) + testData[4].inputHist = hist4 + hist5 := NewHistogram(0, 6, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0) + hist5.AppendBucketWithNDV(&val0Bytes, &val0Bytes, 0, 0, 0) + hist5.AppendBucketWithNDV(&val1Bytes, &val1Bytes, 0, 0, 0) + hist5.AppendBucketWithNDV(&val2Bytes, &val2Bytes, 10, 3, 2) + hist5.AppendBucketWithNDV(&val3Bytes, &val3Bytes, 10, 0, 0) + hist5.AppendBucketWithNDV(&val4Bytes, &val4Bytes, 20, 3, 2) + hist5.AppendBucketWithNDV(&val5Bytes, &val5Bytes, 30, 3, 2) + hist5.AppendBucketWithNDV(&val6Bytes, &val6Bytes, 30, 0, 0) + testData[5].inputHist = hist5 + + // 3. the actual test + for i, test := range testData { + require.Equal(t, test.inputHistToStr, test.inputHist.ToString(1)) + test.inputHist.StandardizeForV2AnalyzeIndex() + require.Equal(t, test.outputHistToStr, test.inputHist.ToString(1), + fmt.Sprintf("testData[%d].inputHist:%s", i, test.inputHistToStr)) + } +} diff --git a/statistics/table.go b/statistics/table.go index 97e7ab71e30ee..6e9bb620addd6 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -409,7 +409,13 @@ func (coll *HistColl) GetAnalyzeRowCount() float64 { slices.Sort(ids) for _, id := range ids { idx := coll.Indices[id] - if idx != nil && idx.IsFullLoad() { + if idx == nil { + continue + } + if idx.Info != nil && idx.Info.MVIndex { + continue + } + if idx.IsFullLoad() { return idx.TotalRowCount() } }