?
There is an index on Column B.
Statistics are gathered without Histograms using:
?
exec DBMS_STATS.GATHER_TABLE_STATS (NULL,'HTAB1', method_opt => 'FOR ALL COLUMNS SIZE 1');
?
Setup:
?
drop table HTAB1;
create table HTAB1 (a number, b number);
Insert into HTAB1 ( A,B) values ( 1,1);
Insert into HTAB1 ( A,B) values ( 2,2);
Insert into HTAB1 ( A,B) values ( 3,3);
Insert into HTAB1 ( A,B) values ( 4,4);
Insert into HTAB1 ( A,B) values ( 9996,9996);
Insert into HTAB1 ( A,B) values ( 9997,9997);
Insert into HTAB1 ( A,B) values ( 9998,9998);
Insert into HTAB1 ( A,B) values ( 9999,9999);
Insert into HTAB1 ( A,B) values ( 10000,10000);
commit;
begin
for i in 5 .. 9995 loop
Insert into HTAB1 ( A,B)
values ( i,5);
if (mod(i,100) = 0) then
commit;
end if;
end loop;
commit;
end;
/
commit;
create index HTAB1_B on HTAB1(b);
exec DBMS_STATS.GATHER_TABLE_STATS (NULL,'HTAB1', method_opt => 'FOR ALL COLUMNS SIZE 1');
alter session set OPTIMIZER_DYNAMIC_SAMPLING = 0;
?
Function to convert raw data in to numeric data:
create or replace function raw_to_number(my_input raw)
return number
as
my_output number;
begin
dbms_stats.convert_raw_value(my_input,my_output);
return my_output;
end;
/
This results in statistics as follows:
?
column COLUMN_NAME format a5 heading COL
column NUM_DISTINCT format 99990
column LOW_VALUE format 99990
column HIGH_VALUE format 99990
column DENSITY format 99990
column NUM_NULLS format 99990
column NUM_BUCKETS format 99990
column SAMPLE_SIZE format 99990
select COLUMN_NAME,NUM_DISTINCT,raw_to_number(LOW_VALUE) Low,raw_to_number(HIGH_VALUE) High,DENSITY,NUM_NULLS,
NUM_BUCKETS,LAST_ANALYZED,SAMPLE_SIZE,HISTOGRAM
from user_tab_columns
where table_name = 'HTAB1';
COL NUM_DISTINCT LOW HIGH DENSITY NUM_NULLS NUM_BUCKETS LAST_ANALYZED SAMPLE_SIZE HISTOGRAM
----- ------------ ---------- ---------- ------- --------- ----------- -------------------- ----------- ---------------
A 10000 1 10000 0 0 1 31-jan-2013 09:32:08 10000 NONE
B 10 1 10000 0 0 1 31-jan-2013 09:32:08 10000 NONE
select lpad(TABLE_NAME,10) TAB, lpad(COLUMN_NAME, 10) COL,
ENDPOINT_NUMBER, ENDPOINT_VALUE
from user_histograms
where table_name='HTAB1'
order by COL, ENDPOINT_NUMBER;
TAB COL ENDPOINT_NUMBER ENDPOINT_VALUE
---------- ---------- --------------- --------------
HTAB1 A 0 1
HTAB1 A 1 10000
HTAB1 B 0 1
HTAB1 B 1 10000
?
In the above you can see that the statistics gathering has not created a histogram. There is a single bucket and high and a low ENDPOINT_NUMBER for each column value ( you will always get 2 entries in USER_HISTOGRAMS for each column, for the high and low values respectively).
?
Test queries:
select * from htab1 where b=5;
select * from htab1 where b=3;
To replicate the tests you will need to disable OPTIMIZER_DYNAMIC_SAMPLING
?
alter session set OPTIMIZER_DYNAMIC_SAMPLING = 0;
?
See:
Document 336267.1 Optimizer Dynamic Sampling (OPTIMIZER_DYNAMIC_SAMPLING)Without Histograms, both queries do an INDEX RANGE SCAN because the optimizer believes that the data is uniformly distributed in column B and that each predicate with return 1/10th of the values because there are 10 distinct values:
?
------------------------------------------------------------------------------