有的时候无法使用Oracle自带的OEM监控,那么就需要确定一个监控方案。 此方案,使用Python+Prometheus+Grafana+Oracle 1.监控配置表 -- Create table
create table DB_MONITOR_TAB
(
  ID                  NUMBER,
  MONITOR_CLASS       VARCHAR2(50),
  MONITOR_NAME        VARCHAR2(50) not null,
  MONITOR_SQL         VARCHAR2(1000),
  CREATED_TIME        DATE default sysdate,
  STATUS              NUMBER default '0',
  MONITOR_INSTRUCTION VARCHAR2(100)
)
tablespace DBADMIN
  pctfree 10
  initrans 1
  maxtrans 255
  storage
  (
    initial 64K
    next 1M
    minextents 1
    maxextents unlimited
  );
-- Add comments to the table
comment on table DB_MONITOR_TAB
  is 'Oracle监控配置表';
-- Add comments to the columns
comment on column DB_MONITOR_TAB.ID
  is '序号';
comment on column DB_MONITOR_TAB.MONITOR_CLASS
  is '监控类别';
comment on column DB_MONITOR_TAB.MONITOR_NAME
  is '监控事项';
comment on column DB_MONITOR_TAB.MONITOR_SQL
  is '监控事项的SQL';
comment on column DB_MONITOR_TAB.CREATED_TIME
  is '监控事项创建时间';
comment on column DB_MONITOR_TAB.STATUS
  is '监控事项是否开启 1:开启,0:不开启';
-- Create/Recreate primary, unique and foreign key constraints
alter table DB_MONITOR_TAB
  add constraint PK_MONITOR_NAME primary key (MONITOR_NAME)
  using index
  tablespace DBADMIN
  pctfree 10
  initrans 2
  maxtrans 255
  storage
  (
    initial 64K
    next 1M
    minextents 1
    maxextents unlimited
  );   2.插入监控数据 insert into db_monitor_tab values (1,'session','v_session','select initcap(s.TYPE)||' '||initcap(s.STATUS) status,count(s.STATUS) from v$session s group by  s.TYPE,s.STATUS order by status',sysdate,1,'当前session个数')
insert into db_monitor_tab values (10,'archive_log_info','v_archive_log_info','select 'Archive_Log_Size' || '(MB)',case when trunc(sum(s.BLOCKS * s.BLOCK_SIZE)/1024/1024, 2) is null then 0 else trunc(sum(s.BLOCKS * s.BLOCK_SIZE)/1024/1024,2) end from v$archived_log s where s.DEST_ID = 1 and s.COMPLETION_TIME >= trunc(sysdate) and s.COMPLETION_TIME < trunc(sysdate + 1)',sysdate,1,'每天归档日志大小(MB)')
insert into db_monitor_tab values (11,'archive_log_info','v_archive_log_count','select 'Archive_Log_Count',count(1) from v$archived_log s where s.DEST_ID = 1 and s.COMPLETION_TIME >= trunc(sysdate) and s.COMPLETION_TIME < trunc(sysdate + 1)',sysdate,1,'每天归档日志数量')
insert into db_monitor_tab values (12,'lock_info','v_locke_count','select 'Lock_Count', count(1) from v$session s where s.STATUS='ACTIVE' AND s.LOCKWAIT is not null and s.BLOCKING_SESSION_STATUS='VALID'',sysdate,1,'行锁数量')
insert into db_monitor_tab values (13,'redo_info','v_redo_info','SELECT initcap(NAME), VALUE FROM V$SYSSTAT where NAME in  ('redo entries','redo buffer allocation retries')',sysdate,1,'redo情况')
insert into db_monitor_tab values (14,'net_info','v_net_info','SELECT initcap(NAME)||'(KB)',round(VALUE/1024,2) FROM V$SYSSTAT where NAME in ('bytes received via SQL*Net from client','bytes sent via SQL*Net to client')',sysdate,1,'数据库数据进出的网络流量')
insert into db_monitor_tab values (2,'process','v_process','select 'Process_Count',count(1) from v$process',sysdate,1,'当前process个数')
insert into db_monitor_tab values (3,'tablespace_used','v_tablespace_used','SELECT x.tablespace_name||'(%)',x.USED_RATE FROM (select * from (select a.TABLESPACE_NAME TABLESPACE_NAME,ROUND((a.total-b.free_space)/a.total*100,2) as USED_RATE from (select TABLESPACE_NAME,sum(bytes/1024/1024) total from dba_data_files group by TABLESPACE_NAME) a,(select TABLESPACE_NAME,sum(bytes/1024/1024) free_space from dba_free_space group by tablespace_name) b where a.TABLESPACE_NAME=b.TABLESPACE_NAME) UNION all select * from (select c.TABLESPACE_NAME TABLESPACE_NAME,ROUND((c.total-d.free_space)/c.total*100,2) as USED_RATE from (select TABLESPACE_NAME,sum(bytes/1024/1024) total from dba_temp_files group by TABLESPACE_NAME) c,(select TABLESPACE_NAME,sum(FREE_SPACE/1024/1024) free_space from dba_temp_free_space group by tablespace_name)d where c.TABLESPACE_NAME=d.TABLESPACE_NAME)) X order by USED_RATE desc',sysdate,1,'各个表空间使用情况')
insert into db_monitor_tab values (4,'wait_class','v_wait_class','SELECT s.WAIT_CLASS,sum(s.TIME_WAITED/1000) FROM v$system_event s group by s.WAIT_CLASS order by s.WAIT_CLASS',sysdate,1,'数据库各类型等待事件统计')
insert into db_monitor_tab values (5,'sga_info','v_shared_pool','select sg.NAME||'(GB)',trunc(sg.BYTES/1024/1024/1024,2) from v$sgainfo sg where sg.NAME in ('Buffer Cache Size','Shared Pool Size') order by sg.name',sysdate,1,'shared_pool 内存大小')
insert into db_monitor_tab values (6,'sga_info','v_shared_pool_free_size','SELECT 'shared_pool_free'||'(GB)',trunc(sp.BYTES/1024/1024/1024,2) FROM V$SGASTAT sp WHERE NAME = 'free memory' and pool='shared pool'',sysdate,1,'shared_pool 空闲内存大小')
insert into db_monitor_tab values (7,'hits_info','v_library_cache_hits','select 'Library_Cache_Hits'||'(%)',trunc(sum(ly.PINHITS)/sum(ly.PINS)*100,2) from v$librarycache ly',sysdate,1,'library_cache_hits')
insert into db_monitor_tab values (8,'hits_info','v_buffer_pool_cache_hits','SELECT 'Buffer_Pool_Cache_Hits'||'(%)',(trunc(1-(PHYSICAL_READS/(DB_BLOCK_GETS+CONSISTENT_GETS)),2))*100 FROM V$BUFFER_POOL_STATISTICS WHERE NAME='DEFAULT'',sysdate,1,'buffer_pool_cache_hits')
insert into db_monitor_tab values (9,'hits_info','v_sql_hard_prse_hit','select 'Hard_Parse_Hits'||'(%)',trunc((select st.VALUE from v$sysstat st  where st.NAME='parse count (hard)')/(select st.VALUE from v$sysstat st  where st.NAME='parse count (total)') *100,2) from dual',sysdate,1,'sql硬解析的百分比')   3.Python监控程序 cat oracle_status_output.py
from prometheus_client import Gauge,start_http_server
import random
import subprocess
import time
import cx_Oracle
class Oracle_Status_Output:
    def __init__(self,db_sql):
        self.db_name = 'dbadmin'
        self.db_password = 'QazWsx12'
        self.db_tns = 'localhost:1521/paydb'
        self.db_sql = db_sql
    def oracle_status_select(self):
        try:
            db=cx_Oracle.connect(self.db_name,self.db_password,self.db_tns)
            cursor = db.cursor()
            cursor.execute(self.db_sql)
            v_result=cursor.fetchall()
            db.close()
            return v_result
        except Exception as e:
            print(e) if __name__ == "__main__":
    start_http_server(9800)
    oracleGauge = Gauge('oracleGauge','Description of gauge', ['mylabelname'])
    while True:
        try:
            time.sleep(1)
            def Gauge_Values(vsql):
                outvalue = Oracle_Status_Output(vsql).oracle_status_select()
                return outvalue
            jsql = Oracle_Status_Output("select monitor_sql from db_monitor_tab where status=1 and monitor_sql is not null").oracle_status_select()
            for x in range(len(jsql)):
                orasql="".join(tuple(jsql[x]))
                orasqlout = Gauge_Values(orasql)
                def ResultOutput(v_status):
                    for i in range(int(len(v_status))):
                        oracleGauge.labels(mylabelname=v_status[i][0]).set(v_status[i][1])
                ResultOutput(orasqlout)
        except Exception as e:
            print(e)
扫码关注我们
微信号:SRE实战
拒绝背锅 运筹帷幄