[20210316]MSSM表空间块ITL的LCK 3.txt

[20210316]mssm表空间块itl的lck 3.txt

–//以前的测试,链接:http://blog.itpub.net/267265/viewspace-2564734/=>[20190125]mssm表空间块itl的lck.txt
–//昨天遇到的问题ora-04000 the sum of pctused and pctfree cannot exceed 100,链接:http://blog.itpub.net/267265/viewspace-2762819/
–//想想是否与这个问题相关呢?这个也是mssm与assm的不同的地方,我决定测试看看。

1.环境:
scott@book> @ ver1
port_string                    version        banner
—————————— ————– ——————————————————————————–
x86_64/linux 2.4.xx            11.2.0.4.0     oracle database 11g enterprise edition release 11.2.0.4.0 – 64bit production

create tablespace mssm datafile
  ‘/mnt/ramdisk/book/mssm01.dbf’ size 40m autoextend on next 16m maxsize unlimited
logging
online
extent management local autoallocate
blocksize 8k
segment space management manual
flashback on;

2.测试:

scott@book> create table t tablespace mssm pctfree 99 pctused 1 as select level id, rpad(level, 3500, ‘x’) vc from dual connect by level <= 1;
table created.
–//注:缺省pctfree=10,pctused=40.

scott@book> @ desc t
name  null?    type
—– ——– —————————-
id             number
vc             varchar2(4000)

scott@book> select rowid ,id,substr(vc,1,2) from t;
rowid                      id subs
—————— ———- —-
aaawmkaahaaaacbaaa          1 1x

scott@book> @ rowid aaawmkaahaaaacbaaa
    object       file      block        row rowid_dba            dba                  text
———- ———- ———- ———- ——————– ——————– —————————————-
     90916          7        129          0  0x1c00081           7,129                alter system dump datafile 7 block 129 ;

scott@book> delete from t where id=1;
1 row deleted.

scott@book> commit ;
commit complete.

scott@book> alter system flush buffer_cache ;
system altered.

scott@book>  alter system dump datafile 7 block 129 ;
system altered.

–//检查转储发现:
block header dump:  0x01c00081
 object id on block? y
 seg/obj: 0x16324  csc: 0x03.177d9219  itc: 3  flg: o  typ: 1 – data
     fsl: 2  fnx: 0x0 ver: 0x01

 itl           xid                  uba         flag  lck        scn/fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  c—    0  scn 0x0003.177d9219
0x02   0x000a.00b.00005d86  0x00c00b60.11db.0b  –u-    2  fsc 0x0db3.177d9231
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03   0x0000.000.00000000  0x00000000.0000.00  —-    0  fsc 0x0000.00000000
–//实际上删除1条记录,而itl=0x02的lck记录的是2.这个是我以前遇到的情况。

3.继续测试:
–//drop table t purge;
scott@book> create table t tablespace mssm pctfree 40 pctused 1 as select level id, rpad(‘x’, 2000, ‘x’) vc from dual connect by level <= 4;
table created.

scott@book> @ desc t
name  null?    type
—– ——– —————————-
id             number
vc             varchar2(2000)

scott@book> select rowid ,id,substr(vc,1,2) from t;
rowid                      id subs
—————— ———- —-
aaawmnaahaaaacbaaa          1 xx
aaawmnaahaaaacbaab          2 xx
aaawmnaahaaaaccaaa          3 xx
aaawmnaahaaaaccaab          4 xx

scott@book> delete from t where id=1;
1 row deleted.

scott@book> commit ;
commit complete.

scott@book> alter system flush buffer_cache ;
system altered.

scott@book> alter system dump datafile 7 block 129 ;
system altered.

–//检查转储发现:
block header dump:  0x01c00081
 object id on block? y
 seg/obj: 0x16327  csc: 0x03.177d9893  itc: 3  flg: –  typ: 1 – data
     fsl: 0  fnx: 0x0 ver: 0x01

 itl           xid                  uba         flag  lck        scn/fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  c—    0  scn 0x0003.177d9893
0x02   0x000a.00f.00005d89  0x00c00b6c.11db.0a  –u-    1  fsc 0x07d7.177d98a6
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03   0x0000.000.00000000  0x00000000.0000.00  —-    0  fsc 0x0000.00000000
–//注意看下划线lck=1,而不是前面的2.

scott@book> delete from t where id=2;
1 row deleted.

scott@book> commit ;
commit complete.

scott@book> alter system flush buffer_cache ;
system altered.

scott@book> alter system dump datafile 7 block 129 ;
system altered.

–//检查转储发现:
block header dump:  0x01c00081
 object id on block? y
 seg/obj: 0x16327  csc: 0x03.177d9893  itc: 3  flg: o  typ: 1 – data
     fsl: 3  fnx: 0x0 ver: 0x01

 itl           xid                  uba         flag  lck        scn/fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  c—    0  scn 0x0003.177d9893
0x02   0x000a.00f.00005d89  0x00c00b6c.11db.0a  –u-    1  fsc 0x07d7.177d98a6
0x03   0x000a.011.00005cd6  0x00c00b6b.11db.1e  –u-    2  fsc 0x07d7.177d9977
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
–//可以发现这次删除是对应事务槽的lck=2.也就是多产生的lck与pctuser的大小相关。
–//当我删除1条记录时,还没有满足pctused=1的情况,而再删除记录时,该块已经没有记录,一定这样满足pctused=1的情况。
–//这样标识该块可以再次投入使用,一些细节我不是很清楚,这样多记录一个事务,从哪里反映出来,我还是不清楚。

4.可以通过如下验证我的推断:
–//drop table t purge;
scott@book> create table t tablespace mssm pctfree 40 pctused 60 as select level id, rpad(‘x’, 2000, ‘x’) vc from dual connect by level <= 4;
table created.
–//注:pctfree=40,pctused=60. 这样即使我删除1条记录也满足这个触发条件(pctused=60).

scott@book> select rowid ,id,substr(vc,1,2) from t;
rowid                      id subs
—————— ———- —-
aaawmoaahaaaacbaaa          1 xx
aaawmoaahaaaacbaab          2 xx
aaawmoaahaaaaccaaa          3 xx
aaawmoaahaaaaccaab          4 xx

scott@book> delete from t where id=2;
1 row deleted.

scott@book> @ xid
xidusn_xidslot_xidsqn
——————————
6.11.2062

scott@book> commit ;
commit complete.

scott@book> alter system flush buffer_cache ;
system altered.

scott@book> alter system dump datafile 7 block 129 ;
system altered.

–//检查转储发现:
block header dump:  0x01c00081
 object id on block? y
 seg/obj: 0x16328  csc: 0x03.177d9bae  itc: 3  flg: o  typ: 1 – data
     fsl: 2  fnx: 0x0 ver: 0x01

 itl           xid                  uba         flag  lck        scn/fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  c—    0  scn 0x0003.177d9bae
0x02   0x0006.00b.0000080e  0x00c009bb.03c4.14  –u-    2  fsc 0x07d7.177d9c1f
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03   0x0000.000.00000000  0x00000000.0000.00  —-    0  fsc 0x0000.00000000

–//继续删除看看:
scott@book> delete from t where id=1;
1 row deleted.

scott@book> commit ;
commit complete.

scott@book> alter system checkpoint ;
system altered.

scott@book> alter system dump datafile 7 block 129 ;
system altered.

–//检查转储发现:
block header dump:  0x01c00081
 object id on block? y
 seg/obj: 0x16328  csc: 0x03.177d9bae  itc: 3  flg: o  typ: 1 – data
     fsl: 2  fnx: 0x0 ver: 0x01

 itl           xid                  uba         flag  lck        scn/fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  c—    0  scn 0x0003.177d9bae
0x02   0x0006.00b.0000080e  0x00c009bb.03c4.14  –u-    2  fsc 0x07d7.177d9c1f
0x03   0x000a.007.00005d7c  0x00c00b6f.11db.0c  –u-    1  fsc 0x07d7.177d9c92
–//也就是检测1次。这也是我前面测试遇到的情况。

5.测试到这里就可以知道理论控制pctused的大小就可以控制lck产生的数量。
或者这样将如果事务的dml没有触发满足pctused的条件,lck就不会原来事务的基础上增加1。

scott@book> alter table t pctused 10;
table altered.

scott@book> delete from t where id=3;
1 row deleted.

scott@book> @ xid
xidusn_xidslot_xidsqn
——————————
5.2.1925

scott@book> commit ;
commit complete.

scott@book> alter system checkpoint ;
system altered.

scott@book> alter system dump datafile 7 block 130 ;
system altered.

–//检查转储发现:
block header dump:  0x01c00082
 object id on block? y
 seg/obj: 0x16328  csc: 0x03.177d9bae  itc: 3  flg: –  typ: 1 – data
     fsl: 0  fnx: 0x0 ver: 0x01

 itl           xid                  uba         flag  lck        scn/fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  c—    0  scn 0x0003.177d9bae
0x02   0x0005.002.00000785  0x00c000c7.0547.19  –u-    1  fsc 0x07d7.177d9f0c
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03   0x0000.000.00000000  0x00000000.0000.00  —-    0  fsc 0x0000.00000000
–//这也再次验证我的判断,但是有一点我一直没明白,oracle如何知道还有一个事务呢?比如我再删除id=4.
–//这时对应的itl槽lck记录的一定是2.

scott@book> delete from t where id=4;
1 row deleted.

scott@book> commit ;
commit complete.

scott@book> alter system checkpoint ;
system altered.

scott@book> alter system dump datafile 7 block 130 ;
system altered.

–//检查转储发现:
block header dump:  0x01c00082
 object id on block? y
 seg/obj: 0x16328  csc: 0x03.177d9bae  itc: 3  flg: o  typ: 1 – data
     fsl: 3  fnx: 0x0 ver: 0x01

 itl           xid                  uba         flag  lck        scn/fsc
0x01   0xffff.000.00000000  0x00000000.0000.00  c—    0  scn 0x0003.177d9bae
0x02   0x0005.002.00000785  0x00c000c7.0547.19  –u-    1  fsc 0x07d7.177d9f0c
0x03   0x000a.010.00005d8d  0x00c00b70.11db.0c  –u-    2  fsc 0x07d7.177da008
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

6.如果你通过bbed修改itl槽信息会出现什么情况呢?
bbed> p dba 7,130 ktbbh.ktbbhitl[2]
struct ktbbhitl[2], 24 bytes                @92
   struct ktbitxid, 8 bytes                 @92
      ub2 kxidusn                           @92       0x000a
      ub2 kxidslt                           @94       0x0010
      ub4 kxidsqn                           @96       0x00005d8d
   struct ktbituba, 8 bytes                 @100
      ub4 kubadba                           @100      0x00c00b70
      ub2 kubaseq                           @104      0x11db
      ub1 kubarec                           @106      0x0c
   ub2 ktbitflg                             @108      0x2002 (ktbfupb)
   union _ktbitun, 2 bytes                  @110
      sb2 _ktbitfsc                         @110      2007
      ub2 _ktbitwrp                         @110      0x07d7
   ub4 ktbitbas                             @112      0x177da008

bbed> assign dba 7,130 ktbbh.ktbbhitl[2].ktbitflg=0x2001
ub2 ktbitflg                                @108      0x2001 (ktbfupb)

–//注:相当于修改该itl槽的lck=1.
bbed> sum apply
check value for file 7, block 130:
current = 0xfae4, required = 0xfae4

bbed> verify
dbverify – verification starting
file = /mnt/ramdisk/book/mssm01.dbf
block = 130

block checking: dba = 29360258, block type = ktb-managed data block
data header at 0x1f41e74
kdbchk: xaction header lock count mismatch
        trans=3 ilk=1 nlo=2
block 130 failed with check code 6108

–//你可以发现检查时报错,oracle的检查如何知道lck应该是2呢?在该块中有什么或者标识反映这样的变化呢?
–//难道在段头dba=7,128处。

bbed> p dba 7,128 ktsfs_txn[0]
struct ktsfs_txn[0], 20 bytes               @4176
   ub2 ktsfsflg                             @4176     0x0001 (ktsused)
   struct ktsfsxid, 8 bytes                 @4180
      ub2 kxidusn                           @4180     0x0006
      ub2 kxidslt                           @4182     0x000b
      ub4 kxidsqn                           @4184     0x0000080e
   ub4 ktsfslhd                             @4188     0x01c00081
   ub4 ktsfsltl                             @4192     0x01c00081
–//0x02   0x0006.00b.0000080e  0x00c009bb.03c4.14  –u-    2  fsc 0x07d7.177d9c1f
–//注意:kxidusn,kxidslt,kxidsqn的事务槽记录的一致。

bbed> p dba 7,128 ktsfs_txn[1]
struct ktsfs_txn[1], 20 bytes               @4196
   ub2 ktsfsflg                             @4196     0x0001 (ktsused)
   struct ktsfsxid, 8 bytes                 @4200
      ub2 kxidusn                           @4200     0x000a
      ub2 kxidslt                           @4202     0x0010
      ub4 kxidsqn                           @4204     0x00005d8d
   ub4 ktsfslhd                             @4208     0x01c00082
   ub4 ktsfsltl                             @4212     0x01c00082
–//0x01c00082 = set dba 7,130 = alter system dump datafile 7 block 130 = 29360258
–//0x03   0x000a.010.00005d8d  0x00c00b70.11db.0c  –u-    2  fsc 0x07d7.177da008
–//注意:kxidusn,kxidslt,kxidsqn的事务槽记录的一致。

–//跟踪bbed的执行看看:
$ ps -ef | grep bbe[d]
oracle   55921 20345  0 mar15 pts/5    00:00:00 /usr/local/bin/rlwrap -s 9999 -c -r -i -f /usr/local/share/rlwrap/bbed /u01/app/oracle/product/11.2.0.4/dbhome_1/bin/bbed parfile=bbed.par cmdfile=cmd.par
oracle   55922 55921  0 mar15 pts/10   00:00:00 bbed app/oracle/product/11.2.0.4/dbhome_1/bin/bbed parfile=bbed.par cmdfile=cmd.par

$ ls -l /proc/55922/fd
total 0
lrwx—— 1 oracle oinstall 64 2021-03-16 10:17:51 0 -> /dev/pts/10
lrwx—— 1 oracle oinstall 64 2021-03-16 10:17:51 1 -> /dev/pts/10
lr-x—— 1 oracle oinstall 64 2021-03-16 10:17:51 10 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/rdbms/mesg/bbedus.msb
lrwx—— 1 oracle oinstall 64 2021-03-16 08:42:11 2 -> /dev/pts/10
lr-x—— 1 oracle oinstall 64 2021-03-16 10:17:51 3 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/rdbms/mesg/bbedus.msb
lr-x—— 1 oracle oinstall 64 2021-03-16 10:17:51 4 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/oracore/mesg/lrmus.msb
lr-x—— 1 oracle oinstall 64 2021-03-16 10:17:51 5 -> /home/oracle/bbed/filelist.txt
lrwx—— 1 oracle oinstall 64 2021-03-16 10:17:51 6 -> /home/oracle/bbed/bifile.bbd
l-wx—— 1 oracle oinstall 64 2021-03-16 10:17:51 7 -> /home/oracle/bbed/log.bbd
lr-x—— 1 oracle oinstall 64 2021-03-16 10:17:51 8 -> /home/oracle/bbed/cmd.par
lrwx—— 1 oracle oinstall 64 2021-03-16 10:17:51 9 -> /mnt/ramdisk/book/mssm01.dbf
–//文件句柄9对应/mnt/ramdisk/book/mssm01.dbf。

$ strace  -f -p 55922 -e read,lseek -o /tmp/bbed.txt
process 55922 attached – interrupt to quit
^cprocess 55922 detached

$ egrep “lseek.9” /tmp/bbed.txt
55922 lseek(9, 1064960, seek_set)       = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, seek_set)       = 1065984
55922 lseek(9, 1067008, seek_set)       = 1067008
55922 lseek(9, 1068032, seek_set)       = 1068032
55922 lseek(9, 1069056, seek_set)       = 1069056
55922 lseek(9, 1070080, seek_set)       = 1070080
55922 lseek(9, 1071104, seek_set)       = 1071104
55922 lseek(9, 1072128, seek_set)       = 1072128
55922 lseek(9, 1064960, seek_set)       = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, seek_set)       = 1065984
55922 lseek(9, 1067008, seek_set)       = 1067008
55922 lseek(9, 1068032, seek_set)       = 1068032
55922 lseek(9, 1069056, seek_set)       = 1069056
55922 lseek(9, 1070080, seek_set)       = 1070080
55922 lseek(9, 1071104, seek_set)       = 1071104
55922 lseek(9, 1072128, seek_set)       = 1072128
55922 lseek(9, 1064960, seek_set)       = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, seek_set)       = 1065984
55922 lseek(9, 1067008, seek_set)       = 1067008
55922 lseek(9, 1068032, seek_set)       = 1068032
55922 lseek(9, 1069056, seek_set)       = 1069056
55922 lseek(9, 1070080, seek_set)       = 1070080
55922 lseek(9, 1071104, seek_set)       = 1071104
55922 lseek(9, 1072128, seek_set)       = 1072128
–//130*8192  = 1064960,扫描dba=7,130块3次,并没有扫描dba=7,128,oracle是如何检查的呢?
–//检查n久发现仅仅可能ktbbh.ktbbhflg,ktbbh.ktbbhfsl

bbed> p dba 7,130 ktbbh
struct ktbbh, 96 bytes                      @20
   ub1 ktbbhtyp                             @20       0x01 (kddbtdata)
   union ktbbhsid, 4 bytes                  @24
      ub4 ktbbhsg1                          @24       0x00016328
      ub4 ktbbhod1                          @24       0x00016328
   struct ktbbhcsc, 8 bytes                 @28
      ub4 kscnbas                           @28       0x177d9bae
      ub2 kscnwrp                           @32       0x0003
   sb2 ktbbhict                             @36       3
   ub1 ktbbhflg                             @38       0x03 (ktbfonfl)  –>0 = on the freelist
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ub1 ktbbhfsl                             @39       0x03             –itl tx freelist slot
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~   
   ub4 ktbbhfnx                             @40       0x00000000

bbed> assign dba 7,130 ktbbh.ktbbhfsl=0x0
ub1 ktbbhfsl                                @39       0x00

bbed> assign dba 7,130 ktbbh.ktbbhflg=0x02
ub1 ktbbhflg                                @38       0x02 (none)

bbed> sum apply
check value for file 7, block 130:
current = 0xf9e5, required = 0xf9e5

bbed> verify
dbverify – verification starting
file = /mnt/ramdisk/book/mssm01.dbf
block = 130
–//当然这样修改没必要,仅仅知道还有1个事务在这里.还可以做一个验证:

bbed> assign dba 7,130 ktbbh.ktbbhitl[1].ktbitflg=0x2002
ub2 ktbitflg                                @84       0x2002 (ktbfupb)

bbed> assign dba 7,130 ktbbh.ktbbhflg=0x03
ub1 ktbbhflg                                @38       0x03 (ktbfonfl)

bbed> assign dba 7,130 ktbbh.ktbbhfsl=0x02
ub1 ktbbhfsl                                @39       0x02  
–//指向对应的itl槽(从1开始计数)。

bbed> sum apply
check value for file 7, block 130:
current = 0xfbe7, required = 0xfbe7

bbed> verify
dbverify – verification starting
file = /mnt/ramdisk/book/mssm01.dbf
block = 130

7. 总结:
–//mssm表空间块itl的lck 出现dml不一致的情况,是触发检查pctused引起的,修改了ktbbh.ktbbhfsl,ktbbh.ktbbhflg标识。
–//这与mssm表空间的有pctused属性有关,实际上昨天出现ora-04000时,脑子第一个反应并不是这个问题如何解决,而是以前
–//遇到的这个问题。换一句话将如果当时能仔细思考mssm与assm有什么不同,或许这个问题早知道答案了。
–//itpub 上太缺乏相关人讨论这样的问题,我检索一下我在itpub上发过这个问题,链接:
–//http://www.itpub.net/thread-2108112-1-1.html
–//我们团队内部更加不可能……..
–//又浪费一个上午的时间….从上午8:30-11:30坐的屁股都疼,真心想站起来走一走。

(0)
上一篇 2022年3月21日
下一篇 2022年3月21日

相关推荐