Gather Stats in Massively Parallel

GATHERING Written Written Modified
STATS IN MASSIVELY PARALLEL JP Vijaykumar June 8th 2013 July 8th 2013
--PROCEDURE TO ANALYZE ALL THE TABLES/PARTITIONS IN MASSIVE PARALLEL IN BACKGROU ND set serverout on size 1000000 timing on scan off verify off declare v_str char(4):=null; --RANGE OF VALUES YES/NO/NULL/'' v_opt varchar2(100):='FOR ALL INDEXED COLUMNS SIZE AUTO'; --v_opt varchar2(100):='FOR ALL COLUMNS SIZE AUTO'; v_pct number:=30; v_job number; v_num number:=0; v_tab varchar2(30):='GATHER_STATS_LIST_P_JP'; v_own varchar2(30); begin for c1 in (select job from user_jobs where broken='N' and what like '%dbms_stats.gather_table_stats%') loo p begin dbms_job.remove(c1.job); exception when others then dbms_output.put_line(sqlerrm); end; end loop; begin execute immediate 'drop table '||v_tab; exception when others then dbms_output.put_line(''); end; execute immediate 'create table '||v_tab||' as select '''|| 'STARTSTARTSTARTSTARTSTARTSTART'||''' table_name, '''||'STARTSTARTSTARTSTARTSTARTSTART'||''' partition_name, '''||'YES'||''' stale_stats, sysdate run_date from dual'; for c1 in ( select a.table_name,a.partition_name,a.partitioning_type,a.stale_stats,a.size_gb ,b.sum_gb , case when a.size_gb < 5 then 2 --when a.size_gb > 1 and a.size_gb < 10 then 2 when a.size_gb > 5 and a.size_gb < 25 then 4 --when a.size_gb > 25 and a.size_gb < 50 then 8 when a.size_gb > 25 and a.size_gb < 100 then 8 when a.size_gb > 100 and a.size_gb < 200 then 16 when a.size_gb > 200 then 32 end degree from ( select t.table_name,t.partition_name,t.stale_stats,t.partition_position,nvl(p.pa rtition_count,1) partition_count, nvl(partitioning_type,'NOT PARTITIONIED') partitioning_type, round(bytes/1024/1024/1024,2) size_gb from (select table_name,partition_name,nvl(stale_stats,'YES') stale_stats,nvl(partiti on_position,1) partition_position from user_tab_statistics where stale_stats like '%'||decode(trim(upper(v_str)),'Y','ALL','YES','ALL','')| |'%'
) t, -- TABLES' WITH STATS LOCKED EXCLUDED (select table_name,partitioning_type,partition_count, case when partitioning_type = 'RANGE' and partition_count > 2 then partition _count -1 else 1 end part_cnt from user_part_tables ) p, user_segments s where t.table_name = p.table_name(+) and t.partition_position >= nvl(p.part_cnt,1) and t.table_name = s.segment_name and nvl(t.partition_name,'x') = nvl(s.partition_name,'x') ) a, (select table_name,sum(size_gb) sum_gb from (select t.table_name,t.partition_name,t.stale_stats,t.partition_position,nvl(p.p artition_count,1) partition_count, round(bytes/1024/1024/1024,2) size_gb from (select table_name,partition_name,nvl(stale_stats,'YES') stale_stats,nvl(partiti on_position,1) partition_position from user_tab_statistics where stale_stats like '%'||decode(trim(upper(v_str)),'Y','ALL','YES','ALL','' )||'%') t, (select table_name,partitioning_type,partition_count, case when partitioning_type = 'RANGE' and partition_count > 2 then partition _count -1 else 1 end part_cnt from user_part_tables ) p, user_segments s where t.table_name = p.table_name(+) and t.partition_position >= nvl(p.part_cnt,1) and t.table_name = s.segment_name and nvl(t.partition_name,'x') = nvl(s.partition_name,'x') ) group by table_name) b where a.table_name = b.table_name and a.table_name not in (select job_name from user_datapump_jobs) --DATAPUMP JOB NAMES EXCLUDED and a.table_name not in (select table_name from user_tables where temporary='Y ') --TEMPORARY TABLES EXCLUDED order by sum_gb,size_gb) loop begin dbms_stats.delete_table_stats(owname=>v_own,tabname=>c1.table_name); --execute immediate 'analyze table '||c1.table_name||' estimate statistics sampl e 1 rows'; if (c1.partition_name is null) then dbms_job.submit(v_job,'begin dbms_stats.gather_table_stats(ownname=>'''||''||''' ,tabname=>'''||c1.table_name|| ''',estimate_percent=>'||v_pct||', cascade=>TRUE,method_opt=>'''||v_opt||''' ,degree=>'||c1.degree||'); insert into '||v_tab||' values('''||c1.table_name||''','''||c1.partition_nam e|| ''','''||c1.stale_stats||''',sysdate); end;',next_date=>sysdate); commit; else dbms_job.submit(v_job,'begin dbms_stats.gather_table_stats(ownname=>'''||''||''' ,tabname=>'''||c1.table_name||''', granularity =>'''|| 'PARTITION'||''',partname=>'''||c1.partition_name||''', estimate_percent=>'||v_pct|| ', cascade=>TRUE,method_opt=>'''||v_opt||''',degree=>'||c1.degree||'); insert into '||v_tab||' values('''||c1.table_name||''','''||c1.partition_na me||''', '''||c1.stale_stats||''',sysdate);
end;',next_date=>sysdate); commit; end if; exception when others then dbms_output.put_line(v_str||' '||sqlerrm); end; end loop; execute immediate 'select count(1) from user_jobs where what like '''||'%dbms_st ats.gather_table_stats%'|| ''' and broken='''||'N'||''' ' into v_num; while (v_num <> 0) loop begin dbms_lock.sleep(15); execute immediate 'select count(1) from user_jobs where what like '''||'%dbms_st ats.gather_table_stats%'|| ''' and broken='''||'N'||''' ' into v_num; exception when others then dbms_output.put_line(sqlerrm); end; end loop; end; / select job,broken from user_jobs where what like '%dbms_stats.gather_table_stats %'; set linesize 120 select table_name,partition_name,stale_stats,start_time,run_date, round((run_date-start_time)*1440,2) elapsed_minutes from GATHER_STATS_LIST_P_JP, (select run_date start_time from GATHER_STATS_LIST_P_JP where table_name like 'START%') order by run_date; ******************************************************************************** ************* GATHERING STATS IN MASSIVELY PARALLEL Written JP Vijaykumar Date June 8th 2013 With large databases, gathering schema stats is always a challenge. The script starts as many background jobs as the number of tables exist in the schema. I omitted temporary tables, tables with locked statistics, tables created by datapump jobs. The time taken by the script is the actual time taken by the largest table in the schema to be analyzed. To further tune the script's run time, better juggle with estimate_percent and degree parameters. I had tried varied degree on tables dependent on their sizes. You need to test the best setting for degree parameter, suitable for your environment and test the estimate_percent setting for better results. In the script, I analyze a table, if the table NON-PARTITIONED, if the table is partitioned on RANGE then the last two partitions
of the table are analyzed, if the table is partitioned by NON-RANGE partition, then all the partitions of the table will be analyzed. This script will not check on SUB-PARTITIONS and any other advanced partition types. Since the procedure may take long time, I want to know from time to time, where my procedure is and how many tables are so far analyzed. For this purpose I create a table called GATHER_STATS_PARALLEL_JP inside the schema and insert messages after each table is analyzed. You can create a table of your choice inside the schema and insert messages. If you don't want any table in the schema, you have to wait for the pl/sql procedure to display all the messages after the processing completes, which takes few hours. At times, after displaying some messages, the procedure may fail with buffer overflow error. REQUIREMENTS: The schema owner is granted execute privilege on dbms_job and dbms_lock packages. CONSTRAINTS: Since all the tables in the schema are analyzed simultaneously, the load on the db will be high during the run of the proc. Performance of other jobs running during the stats run time, will be slo w. During my testing in 10g, when the degree > 32, the jobs failed, need further research. CONVENTIONS: Need to customize the shell script and pl/sql procedure as per your envi ronment. All unbroken dbms_stats jobs currently running are removed(for gathering stats). If need be, you can modify the pl/sql procedure to remove all(st ats jobs) broken/unbroken dbms_stats jobs currently running in the db, before starting the dbms_stats jobs in the background. Getting the schema password dynamically changes from environment to envi ronment. As dbms_output.put_line can not display large number of messages, and ma y fail with buffer overflow error. I am creating a small table during t he run of this procedure to capture the elapsed time for each of the ta bles' stats gathering. At the end, a report is generated from this tab le. This procedure does not handle tables with locked statistics, created by users' datapump jobs and temporary tables. I used the case to generate an appropriate number for DEGREE, as appropr iate to my environment. This is based on the number of segments and t heir sum size in GB falling under each bucket. As per your requirements and depending on the size of the db, the mainte nance
window available for gathering stats, change the estimate percen t for each of the segments to an appropriate and variable number, generated inside the cursor loop, using a case statement. USAGE: Pls test these scripts in test/dev environments, once you are satisfied with your tests' results, use them in production dbs at your own desc retion. --Created a wrapper shell script to run my script. $run_stats.sh Usage script_name db_name schema_name file_name YES(optional) $run_stats.sh proddb $cat run_stats.sh #!/bin/ksh File=`basename $0` if [[ $# -le "2" ]]; then echo "Usage script_name instance_name schema_name file_name YES(optional)" exit fi export DBNAME=ècho $1|awk '{print toupper($1)}'` export SCHEMA=ècho $2|tr 'A-Z' 'a-z'` export FILENM=ècho $3|tr 'A-Z' 'a-z'|cut -d"." -f1` export OPTION=ècho $4|tr 'a-z' 'A-Z'` if [[ -z $OPTION ]]; then export OPTION=''; fi export DIRNAM=/apps/oracle/scripts/jp export LOGNAM=${DIRNAM}/${DBNAME}_${SCHEMA}_${FILENM}.dat export MAILST=jp_vijaykumar[ @ ]yahoo [ . ]com cp /dev/null ${LOGNAM} cd ${DIRNAM} . ~/.bash_profile > /dev/null 2>&1 export PASSWD=`/apps/oracle/scripts/getpassword $DBNAME $SCHEMA` echo $DBNAME $SCHEMA $PASSWD ${OPTION} . /apps/oracle/scripts/setora $DBNAME date >> ${LOGNAM} sqlplus -s / >> ${LOGNAM} << EOF1 connect $SCHEMA/$PASSWD @${DIRNAM}/$FILENM.sql ${OPTION} quit EOF1 date >> ${LOGNAM} if [[ $(cat ${LOGNAM}|grep "ORA-"|wc -l) > "0" ]];then export SUBJCT="Errors while executing ${FILENM} in ${DBNAME}" else export SUBJCT="$FILENM executed in $DBNAME $SCHEMA" fi saketh gather_stats_parallel.sql
echo | mutt -s "${SUBJCT}" -a ${LOGNAM} $MAILST exit --Pls find the pl/sql script, which spawns a background process for each of the tables in the schema to gather stats. Pls modify the script, as per your requirements and use it. set serverout on size 1000000 timing on declare v_str char(30):='&1';--RANGE OF VALUES YES OR NULL v_job number; v_num number:=0; begin for c1 in (select job from user_jobs --UNBROKEN DBMS_STATS JOBS ARE REMOVED where broken='N' and what like '%dbms_stats.gather_table_stats%') loo p begin dbms_job.remove(c1.job); exception when others then dbms_output.put_line(sqlerrm); end; end loop; begin execute immediate 'drop table gather_stats_parallel_jp'; exception when others then dbms_output.put_line(''); end; execute immediate 'create table gather_stats_parallel_jp as select '''|| 'STARTSTARTSTARTSTARTSTARTSTART'||''' table_name, '''||'STARTSTARTSTARTSTARTSTARTSTART'||''' partition_name, '''||'YES'||''' stale_stats, sysdate run_date from dual'; for c1 in ( select a.table_name,a.partition_name,a.partitioning_type,a.stale_stats,a.size_gb ,b.sum_gb , case when a.size_gb < 5 then 2 --when a.size_gb > 1 and a.size_gb < 10 then 2 when a.size_gb > 5 and a.size_gb < 25 then 4 --when a.size_gb > 25 and a.size_gb < 50 then 8 when a.size_gb > 25 and a.size_gb < 100 then 8 when a.size_gb > 100 and a.size_gb < 200 then 16 when a.size_gb > 200 then 32 end degree from ( select t.table_name,t.partition_name,t.stale_stats,t.partition_position, nvl(p.partition_count,1) partition_count, nvl(partitioning_type,'NOT PARTITIONIED') partitioning_type, round(bytes/1024/1024/1024,2) size_gb from (select table_name,partition_name,nvl(stale_stats,'YES') stale_stats, nvl(partition_position,1) partition_position from user_tab_statistics where stattype_locked is null) t, -- TABLES' W ITH STATS LOCKED EXCLUDED (select table_name,partitioning_type,partition_count, case when partitioning_type = 'RANGE' and partition_count > 2 then partition_co unt -1 else 1 end part_cnt from user_part_tables ) p,
user_segments s where t.table_name = p.table_name(+) and t.partition_position >= nvl(p.part_cnt,1) and t.table_name = s.segment_name and nvl(t.partition_name,'x') = nvl(s.partition_name,'x') ) a, (select table_name,sum(size_gb) sum_gb from (select t.table_name,t.partition_name,t.stale_stats,t.partition_position, nvl(p.partition_count,1) partition_count, round(bytes/1024/1024/1024,2) size_gb from (select table_name,partition_name,nvl(stale_stats,'YES') stale_stats, nvl(partition_position,1) partition_position from user_tab_statistics ) t, (select table_name,partitioning_type,partition_count, case when partitioning_type = 'RANGE' and partition_count > 2 then partition_co unt -1 else 1 end part_cnt from user_part_tables ) p, user_segments s where t.table_name = p.table_name(+) and t.partition_position >= nvl(p.part_cnt,1) and t.table_name = s.segment_name and nvl(t.partition_name,'x') = nvl(s.partition_name,'x') ) group by table_name) b where a.table_name = b.table_name and a.table_name not in (select job_name from user_datapump_jobs) --DATAPUMP JOB NAMES EXCLUDED and a.table_name not in (select table_name from user_tables where temporary='Y ') --TEMPORARY TABLES EXCLUDED order by sum_gb,size_gb) loop begin if (trim(upper(v_str)) = 'YES') then if (trim(upper(v_str)) = trim(upper(c1.stale_stats))) then if (c1.partition_name is null) then dbms_job.submit(v_job,'begin dbms_stats.gather_table_stats(ownname=>'''||''||''' , tabname=>'''||c1.table_name||''', estimate_percent=>30, cascade=>TRUE, method_opt=>'''||'FOR ALL INDEXED COLUMNS SIZE AUTO'||''',degree=>'||c1.degr ee||'); insert into gather_stats_parallel_jp values('''||c1.table_name||''','''|| c1.partition_name||''', '''||c1.stale_stats||''',sysdate); end;',next_date=>sysdate); commit; else dbms_job.submit(v_job,'begin dbms_stats.gather_table_stats(ownname=>'''||''||''' , tabname=>'''||c1.table_name||''',granularity =>'''|| 'PARTITION'||''', partname=>'''||c1.partition_name||''', estimate_percent=>30, cascade=>T RUE, method_opt=>'''||'FOR ALL INDEXED COLUMNS SIZE AUTO'||''',degree=>'||c1.deg ree||'); insert into gather_stats_parallel_jp values('''||c1.table_name||''','''|| c1.partition_name||''', '''||c1.stale_stats||''',sysdate); end;',next_date=>sysdate); commit; end if; end if; else if (c1.partition_name is null) then dbms_job.submit(v_job,'begin dbms_stats.gather_table_stats(ownname=>'''||''||'''
, tabname=>'''||c1.table_name||''',estimate_percent=>30, cascade=>TRUE, method_opt=>'''||'FOR ALL INDEXED COLUMNS SIZE AUTO'||''',degree=>'||c1.degr ee||'); insert into gather_stats_parallel_jp values('''||c1.table_name||''','''|| c1.partition_name||''', '''||c1.stale_stats||''',sysdate); end;',next_date=>sysdate); commit; else dbms_job.submit(v_job,'begin dbms_stats.gather_table_stats(ownname=>'''||''||''' , tabname=>'''||c1.table_name||''',granularity =>'''|| 'PARTITION'||''', partname=>'''||c1.partition_name||''',estimate_percent=>30, cascade=>TRUE, method_opt=>'''||'FOR ALL INDEXED COLUMNS SIZE AUTO'||''',degree=>'||c1.deg ree||'); insert into gather_stats_parallel_jp values('''||c1.table_name||''','''|| c1.partition_name||''','''||c1.stale_stats||''',sysdate); end;',next_date=>sysdate); commit; end if; end if; exception when others then dbms_output.put_line(v_str||' '||sqlerrm); end; end loop; execute immediate 'select count(1) from user_jobs where what like '''|| '%dbms_stats.gather_table_stats%'||''' and broken='''||'N'||''' ' into v_num ; while (v_num <> 0) loop begin dbms_lock.sleep(15); execute immediate 'select count(1) from user_jobs where what like '''|| '%dbms_stats.gather_table_stats%'||''' and broken='''||'N'||''' ' into v_num ; exception when others then dbms_output.put_line(sqlerrm); end; end loop; end; / set linesize 120 select table_name,partition_name,stale_stats,start_time,run_date, round((run_date-start_time)*1440,2) elapsed_minutes from gather_stats_parallel_jp, (select run_date start_time from gather_stats_parallel_jp where table_name like 'START%') order by run_date; References: http://docs.oracle.com/cd/E23903_01/doc/doc.41/e21674/advnc_plsql.htm http://www.scribd.com/doc/49619357/Gather-Table-Statistics-In-Parallel http://www.scribd.com/doc/47825085/Where-Is-My-Proc

Gather Stats in Massively Parallel

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Gather Stats in Massively Parallel

Uploaded by

Copyright:

Available Formats

GATHERING Written Written Modified

You might also like