[20200401]优化的困惑5.txt

--//春节前对一个生产系统做优化,完成后使用ash_wait_chains.sql检查,发现control file parallel write有点多。
--//当时并没有在意,总感觉哪里不对,感觉这套系统磁盘IO有问题,现在有空分析看看。


1.环境:
> @ ver1
PORT_STRING                    VERSION        BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx            11.2.0.3.0     Oracle Database 11g Enterprise Edition Release 11.2.0.3.0 - 64bit Production

2.问题提出:
> @ tpt/ash/dash_wait_chains event2 1=1 trunc(sysdate-2) trunc(sysdate-1)
%This     SECONDS     AAS WAIT_CHAIN
------ ---------- ------- ---------------------------------------------
  59%        2790      .0 -> control file parallel write
  21%         980      .0 -> ON CPU
  13%         640      .0 -> log file parallel write
   4%         170      .0 -> db file async I/O submit
   1%          70      .0 -> log file sync  -> log file parallel write
   1%          60      .0 -> db file sequential read
   0%          20      .0 -> LNS wait on SENDREQ
   0%          10      .0 -> os thread startup
   0%          10      .0 -> ADR block file read
9 rows selected.
--//trunc(sysdate-2) trunc(sysdate-1) 范围在2020/3/29 2020/3/30 之间,是星期天.
--//很明显这是一套根本不忙的系统(我已经做了许多优化),当然的时间区间也很大,不过还是可以看出服务器不忙。
--//同时很不理解为什么control file parallel write能达到2790秒。

3.分析:
# lsof /u01/app/oracle/oradata/xxxyyy/control01.ctl
COMMAND   PID   USER   FD   TYPE DEVICE SIZE/OFF     NODE NAME
oracle   3827 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle   3829 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle   3831 oracle  256uW  REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
oracle   3837 oracle  257u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle   3861 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle   3995 oracle  257u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle   4112 oracle  257u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle  10221 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle  18633 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle  18688 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle  18732 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle  23969 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
oracle  24082 oracle  256u   REG    8,3 11812864 35061901 /u01/app/oracle/oradata/xxxyyy/control01.ctl
--//仅仅进程3831有写。

# ps -ef | grep 383[1]
oracle    3831     1  0  2016 ?        04:13:39 ora_ckpt_xxxyyy
--//正是ckpt进程。

SYS@xxxyyy> show parameter filesystem
NAME                 TYPE   VALUE
-------------------- ------ -------
filesystemio_options string ASYNCH
--//打开了磁盘异步IO。

$ ls -l /proc/3831/fd  | grep control0
lrwx------ 1 oracle oinstall 64 2020-03-31 09:18:42 256 -> /u01/app/oracle/oradata/xxxyyy/control01.ctl
lrwx------ 1 oracle oinstall 64 2020-03-31 09:18:42 257 -> /u01/app/oracle/oradata/xxxyyy/control02.ctl
--//控制文件对应文件句柄是256,257。

$ strace -f -e io_submit -Ttt  -p  3831
Process 3831 attached - interrupt to quit
09:24:52.522886 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.022249>
09:24:53.545944 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.015565>
09:24:54.561196 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.016591>
09:24:55.579054 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.015069>
09:24:56.594928 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.015453>
09:24:57.612264 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.014407>
09:24:58.627982 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.023324>
09:24:59.652000 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.032261>
09:25:00.685230 io_submit(47787382317056, 2, {{0x2b765df0c210, 0, 1, 0, 256}, {0x2b765df0c460, 0, 1, 0, 257}}) = 2 <0.081960>
Process 3831 detached
--//你可以发现每秒都调用一次io_submit写入,时间间隔大约1秒1次,实际上就是写增量检查点,数据库很空闲。

0.022249+0.015565+0.016591+0.015069+0.015453+0.014407+0.023324+0.032261+0.081960 = .236879
.236879/9 = .02632,平均占.02632秒。
--//如果换算成毫秒的话,相当于26ms.这是一个相当差劲的IO,我严重怀疑BIOS没有打开写回功能(write back),而是设置在写通模式
--//(write through)上.

--//相当于一天调用 86400 /(1+.02632) = 84184次。需要84184*.02632 = 2215.72288秒,当然这是io_submit的时间,并非control file
--//parallel write.非常接近上面看到control file parallel write=2790秒。

--//再加大取样量看看。
# strace -f -e io_submit -Ttt  -p  3831  -o/tmp/aaa
Process 3831 attached - interrupt to quit
Process 3831 detached

# cat /tmp/aaa | awk '{print $17}' | tr -d "<>"  | awk '{j++;i=i+$NF}END{print j,i,i/j}'
42 1.04516 0.0248849

--//取样42次,0.0248849秒相当于25ms。相当慢的磁盘IO。

4.如何确定磁盘在write back还是write Through状态呢?
--//方法一:首先想到的进入BIOS检查,当然目前不行。
--//方法二:dmesg,没有发现相关信息。
--//方法三:我知道dell随机光盘里面有一个使用程序可以通过浏览器查看硬件配置的程序。这个比较麻烦,机器已经太老了。相关资料
--//放在哪里也不知道。
--//方法四:hdparm,看了一下man文档也没有发现相关设置与检查。
# hdparm 2>&1 | grep -i write
 --Istdout write identify data to stdout as ASCII hex
 -n   get/set ignore-write-errors flag (0/1)
 -W   set drive write-caching flag (0/1) (DANGEROUS)
--//看man hdparm,感觉这个命令很危险DANGEROUS,放弃。
--//方法五:想想linux还有什么命令了解硬件配置呢,马上想到dmidecode。

# dmidecode | grep -i write
        Operational Mode: Write Through
        Operational Mode: Write Through
        Operational Mode: Write Back
        Operational Mode: Write Through
        Operational Mode: Write Through
        Operational Mode: Write Back
--//很明显了安装人员没有很好的设置磁盘操作模式。

# dmidecode > /tmp/aaa1
# dmidecode -t 7
# dmidecode 2.11
SMBIOS 2.7 present.

Handle 0x0700, DMI type 7, 19 bytes
Cache Information
        Socket Designation: Not Specified
        Configuration: Enabled, Not Socketed, Level 1
        Operational Mode: Write Through
        Location: Internal
        Installed Size: 128 kB
        Maximum Size: 128 kB
        Supported SRAM Types:
                Unknown
        Installed SRAM Type: Unknown
        Speed: Unknown
        Error Correction Type: Single-bit ECC
        System Type: Data
        Associativity: 8-way Set-associative

Handle 0x0701, DMI type 7, 19 bytes
Cache Information
        Socket Designation: Not Specified
        Configuration: Enabled, Not Socketed, Level 2
        Operational Mode: Write Through
        Location: Internal
        Installed Size: 1024 kB
        Maximum Size: 1024 kB
        Supported SRAM Types:
                Unknown
        Installed SRAM Type: Unknown
        Speed: Unknown
        Error Correction Type: Single-bit ECC
        System Type: Unified
        Associativity: 8-way Set-associative

Handle 0x0702, DMI type 7, 19 bytes
Cache Information
        Socket Designation: Not Specified
        Configuration: Enabled, Not Socketed, Level 3
        Operational Mode: Write Back
        Location: Internal
        Installed Size: 10240 kB
        Maximum Size: 10240 kB
        Supported SRAM Types:
                Unknown
        Installed SRAM Type: Unknown
        Speed: Unknown
        Error Correction Type: Single-bit ECC
        System Type: Unified
        Associativity: <OUT OF SPEC>

Handle 0x0703, DMI type 7, 19 bytes
Cache Information
        Socket Designation: Not Specified
        Configuration: Enabled, Not Socketed, Level 1
        Operational Mode: Write Through
        Location: Internal
        Installed Size: 0 kB
        Maximum Size: 0 kB
        Supported SRAM Types:
                Unknown
        Installed SRAM Type: Unknown
        Speed: Unknown
        Error Correction Type: Unknown
        System Type: Data
        Associativity: Unknown

Handle 0x0704, DMI type 7, 19 bytes
Cache Information
        Socket Designation: Not Specified
        Configuration: Enabled, Not Socketed, Level 2
        Operational Mode: Write Through
        Location: Internal
        Installed Size: 0 kB
        Maximum Size: 0 kB
        Supported SRAM Types:
                Unknown
        Installed SRAM Type: Unknown
        Speed: Unknown
        Error Correction Type: Unknown
        System Type: Unified
        Associativity: Unknown

Handle 0x0705, DMI type 7, 19 bytes
Cache Information
        Socket Designation: Not Specified
        Configuration: Enabled, Not Socketed, Level 3
        Operational Mode: Write Back
        Location: Internal
        Installed Size: 0 kB
        Maximum Size: 0 kB
        Supported SRAM Types:
                Unknown
        Installed SRAM Type: Unknown
        Speed: Unknown
        Error Correction Type: Unknown
        System Type: Unified
        Associativity: Unknown

# dmidecode -t 7 | egrep -i "write|Installed Size"
        Operational Mode: Write Through
        Installed Size: 128 kB
        Operational Mode: Write Through
        Installed Size: 1024 kB
        Operational Mode: Write Back
        Installed Size: 10240 kB
        Operational Mode: Write Through
        Installed Size: 0 kB
        Operational Mode: Write Through
        Installed Size: 0 kB
        Operational Mode: Write Back
        Installed Size: 0 kB

--//找了一台相似的机器检查发现:
# dmidecode | grep -i write
        Operational Mode: Write Back
        Operational Mode: Write Back
        Operational Mode: Write Back
        Operational Mode: Write Back
        Operational Mode: Write Back
        Operational Mode: Write Back

--//基本可以验证我的推断,安装OS的人员没有很好的设置BIOS,设置在Write Through模式导致写入磁盘IO有点慢。
--//要么还有一种可能就是板上可能有电池,已经没电了,无法设置在write back模式。仅仅是我的推断。

5.顺便测试read看看:
--//session 1:
> @ spid
         SID      SERIAL# PROCESS                  SERVER    SPID       PID    P_SERIAL# C50
------------ ------------ ------------------------ --------- ------ ------- ------------ --------------------------------------------------
         843        60253 40936                    DEDICATED 4912       126           56 alter system kill session '843,60253' immediate;

> @ check
                                                                                                                                                                                                           检查点队列
                                                                                                                                                                                     当前时间                脏块数量
low_rba              on_disk_rba          on_disk_rba_scn( on_disk_rba_time(CP full checkpoint_rba   full_checkpoint( full_checkpoint_tim diff_scn(on_disk_rdb-ch_scn) current_group SYSDATE                    CPDRT
-------------------- -------------------- ---------------- ------------------- --------------------- ---------------- ------------------- ---------------------------- ------------- ------------------- ------------
12898.27952.0        12898.31085.0        30787894340      2020-04-02 09:45:35 12898.2.16            30787875482      2020-04-02 08:53:27                        18858             1 2020-04-02 09:45:36          486

--//session 2:
# strace -x -f -e pread,io_getevents,io_submit -Ttt  -p  4912
Process 4912 attached - interrupt to quit
09:45:36.707530 pread(257, "\x15\xc2\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x04\x15\x4c\x00\x00\x00\x00\x00\x00\x00\x00\x20\x0b\x56\x37\x09\x0f"..., 16384, 16384) = 16384 <0.000024>
09:45:36.707785 pread(257, "\x15\xc2\x00\x00\x0f\x00\x00\x00\xd7\x59\x19\x00\xff\xff\x01\x04\xa8\x3c\x00\x00\x00\x47\x00\x00\x00\x00\x00\x00\x00\x00\x00\x45"..., 16384, 245760) = 16384 <0.000018>
09:45:36.707888 pread(257, "\x15\xc2\x00\x00\x11\x00\x00\x00\xd7\x59\x19\x00\xff\xff\x01\x04\xc4\x34\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xd6\x6a\xdc\x33"..., 16384, 278528) = 16384 <0.000018>
09:45:36.707985 pread(257, "\x15\xc2\x00\x00\x14\x00\x00\x00\xc8\x59\x19\x00\xff\xff\x01\x04\x2e\x77\x00\x00\x0f\x00\x00\x00\x9a\xb2\x19\x2b\x07\x00\x1c\x0a"..., 16384, 327680) = 16384 <0.000017>
09:45:36.708108 pread(257, "\x15\xc2\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x04\x5b\x9a\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\xe6\x01\x00\x00"..., 16384, 49152) = 16384 <0.000019>
Process 4912 detached

--//异步IO读取使用pread函数吗,数据库没有使用asm原因?
--//参考链接http://blog.itpub.net/267265/viewspace-2222208/,执行如下:

> @ viewsess 'physical read total'
NAME                                     STATISTIC#        VALUE          SID
---------------------------------------- ---------- ------------ ------------
physical read total IO requests                  48          273          843
physical read total multi block requests         49            6          843
physical read total bytes                        52      6833152          843

> @ check
                                                                                                                                                                                                           检查点队列
                                                                                                                                                                                     当前时间                脏块数量
low_rba              on_disk_rba          on_disk_rba_scn( on_disk_rba_time(CP full checkpoint_rba   full_checkpoint( full_checkpoint_tim diff_scn(on_disk_rdb-ch_scn) current_group SYSDATE                    CPDRT
-------------------- -------------------- ---------------- ------------------- --------------------- ---------------- ------------------- ---------------------------- ------------- ------------------- ------------
12898.34936.0        12898.36928.0        30787898313      2020-04-02 09:58:32 12898.2.16            30787875482      2020-04-02 08:53:27                        22831             1 2020-04-02 09:58:33          332

> @ viewsess 'physical read total'
NAME                                     STATISTIC#        VALUE          SID
---------------------------------------- ---------- ------------ ------------
physical read total IO requests                  48          278          843
physical read total multi block requests         49            6          843
physical read total bytes                        52      6915072          843

--//上下比较,可以发现physical read total IO requests增加5次(278-273=5),physical read total bytes增加 6915072-6833152=81920,正好等于16384*5= 81920.
--//你可以发现读16384字节 每次pread的时间很小。

总结:
--//再次提醒自己注意一些细节。数据库上线前给仔细检查,实际上还是我提到的分工问题,如果节点上的人做好自己的工作,
--//链接:http://blog.itpub.net/267265/viewspace-2102914/ => [20160519]浅谈行业分工.txt ,这台机器就是当年的这台服务器。
--//实际上如果当时我没有再仔细看,这个问题就给划过了。
--//还有一点要说明的是通过awr报表很容易划过这个问题。

Top 5 Timed Foreground Events

Event                            Waits    Time(s)    Avg wait (ms)  % DB time  Wait Class
DB CPU                           265                                73.26
log file sync                    8,377         98    12             27.09      Commit
db file sequential read          6,670         15    2              4.28       User I/O
enq: KO - fast object checkpoint 151           7     47             1.95       Application
SQL*Net more data to client      176,457       3     0              0.82       Network
--//负载太轻了。不在前台等待事件。

Background Wait Events

    ordered by wait time desc, waits desc (idle events last)
    Only events with Total Wait Time (s) >= .001 are shown
    %Timeouts: value of 0 indicates value was < .5%. Value of null is truly 0

Event                           Waits  %Time -outs  Total Wait Time (s)  Avg wait (ms)   Waits /txn      % bg time
log file parallel write         9,963            0                 112             11          1.16      37.05
control file parallel write     3,610            0                 106             29          0.42      35.23
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
db file async I/O submit        877              0                  51             59          0.10      17.08
os thread startup               37               0                   1             25          0.00      0.30
LNS wait on SENDREQ             9,935            0                   1             0           1.16      0.17
--//注意看 Avg wait (ms) ,只能讲自己功力还是不够,没有仔细看这些细节,主要精力放在前台事件的优化上了,忽略后后台事件。
--//磁盘IO存在问题。

--//附上测试脚本:
$ cat check.sql
column "full checkpoint_rba" format a21
column low_rba format a20
column low_rba16 format a20
column on_disk_rba format a20
column on_disk_rba16 format a20
column rtckp_rba format a20
column diff_date format 9999999.99
rem column CPOSD_ono_disk_rba_scn format 99999999999999999999999999999999
column cpdrt heading "检查点队列|脏块数量|CPDRT"
column cpodt_on_disk_rba heading "检查点队列|on disk rba|时间戳|CPODT"
column cpods heading "检查点队列|on disk rba scn|CPODS"
column cphbt heading "检查点心跳|CPHBT"
column current_sysdate heading "当前时间|SYSDATE"
set num 12

SELECT b.cplrba_seq || '.' || b.cplrba_bno || '.' || b.cplrba_bof "low_rba"
      ,b.cpodr_seq || '.' || b.cpodr_bno || '.' || b.cpodr_bof "on_disk_rba"
      ,b.CPODS "on_disk_rba_scn(CPODS)"
      ,TO_DATE (b.CPODT, 'MM-DD-YYYY HH24:MI:SS') "on_disk_rba_time(CPODT)"
      ,a.rtckp_rba_seq || '.' || a.rtckp_rba_bno || '.' || a.rtckp_rba_bof
          "full checkpoint_rba"
      ,a.rtckp_scn "full_checkpoint(rtckp_scn)"
      ,TO_DATE (a.rtckp_tim, 'MM-DD-YYYY HH24:MI:SS')
          "full_checkpoint_time_rtckp_tim"
      ,b.CPODS - a.rtckp_scn "diff_scn(on_disk_rdb-ch_scn)"
      ,a.rtcln "current_group"
      ,sysdate current_sysdate
          ,CPDRT
  FROM x$kccrt a, x$kcccp b
 WHERE a.rtnum = b.cptno AND A.INST_ID = b.inst_id;

$ cat viewsess.sql
set verify off
column name format a70
SELECT b.NAME, a.statistic#, a.VALUE,a.sid
  FROM v$mystat a, v$statname b
 WHERE lower(b.NAME) like lower('%&1%') AND a.statistic# = b.statistic#
 and a.value>0;

 [20200401]优化的困惑5.txt Oracle
扫码关注我们
微信号:SRE实战
拒绝背锅 运筹帷幄

SRE实战 互联网时代守护先锋,助力企业售后服务体系运筹帷幄!一键直达领取阿里云限量特价优惠。