SAS宏分析Log找出最费时间的步骤

我经常遇到的问题是想找出成百上千个DATA步和过程步中花费时间最长的步骤(对程序进行优化),但SAS输出的Log文件可读性不强,也不能从里面方便的找到我要的信息。
我写的这个宏是把Log文件里每一个步骤的名称、所在Log文件的行号、所花的实际时间和CPU时间提取出来,方便分析。也可以对日志进行分析找出错误信息和警告信息,这个我们留到以后再说。
Log文件大致是下面这个样子:
NOTE: Deleting WORK.SS_TEMP (memtype=DATA).
NOTE: PROCEDURE DELETE used (Total process time):
      real time           0.00 seconds
      cpu time            0.01 seconds


NOTE: The data set WORK.SS_TEMP has 1 observations and 1 variables.
NOTE: DATA statement used (Total process time):
      real time           0.00 seconds
      cpu time            0.00 seconds


NOTE: There were 1 observations read from the data set WORK.SS_TEMP.
NOTE: The data set WORK.SS_TEMP has 1 observations and 2 variables.
NOTE: DATA statement used (Total process time):
      real time           0.00 seconds
      cpu time            0.00 seconds


NOTE: PROCEDURE SQL used (Total process time):
      real time           0.00 seconds
      cpu time            0.00 seconds
      
此宏的作用是读取SAS系统生成的Log文件,并将得到的步骤名称和对应的运行时间保存至结果表格中。其中,InFilePath指定的Log文件,要写全从根目录至最后的文件名称;TargetTable是结果表格。最终得到的是原始Log文件中包含的步骤名称和对应的运行时间(实际时间和CPU时间),其中N是步骤在原始Log文件中的行号。
%macro GetTimeTableForSASLog(InFilePath,TargetTable);
  /**********************************************************************/
  /* 检查InFilePath的合法性,后缀必须为TXT或LOG */
  %if %SYSFUNC(FIND(&InFilePath,%STR(.))) EQ 0 OR (%SYSFUNC(FIND(&InFilePath,%STR(.))) NE 0 AND %UPCASE(%SCAN(&InFilePath,-1,%STR(.))) NE TXT AND %UPCASE(%SCAN(&InFilePath,-1,%STR(.))) NE LOG) %then
    %do;
      %put ERROR: The suffix of InFilePath should be txt or log, please check it again.;
      %goto exit;
    %end;

  /* 检查TargetTable的存在性 */
  %if &TargetTable EQ %STR() %then
    %do;
      %put ERROR: The TargetTable should not be blank, please check it again.;
      %goto exit;
    %end;
  /* 开始进行计算 */
  /* 第一步:导入LOG文件 */
  proc import datafile="&InfilePath" out=&TargetTable replace;
    getnames=no;
  run;

  /* 第二步:处理结果表格 */
  data &TargetTable;
    set &TargetTable;
    N=_N_;


    if SUBSTR(UPCASE(VAR1),1,10) EQ 'NOTE: DATA' OR SUBSTR(UPCASE(VAR1),1,15) EQ 'NOTE: PROCEDURE' OR
      SUBSTR(UPCASE(VAR1),1,9) EQ 'REAL TIME' OR SUBSTR(UPCASE(VAR1),1,8) EQ 'CPU TIME';
  run;

  data &TargetTable;
    set &TargetTable;

    if SUBSTR(UPCASE(VAR1),1,5) EQ 'NOTE:' then
      do;
        Name=SUBSTR(VAR1,7,FIND(UPCASE(VAR1),'USED')-8);
      end;
    else if SUBSTR(UPCASE(VAR1),1,9) EQ 'REAL TIME' then
      do;
        Real_Time=SUBSTR(VAR1,PRXMATCH('/\d+.\d+/',VAR1));
      end;
    else if SUBSTR(UPCASE(VAR1),1,9) EQ 'CPU TIME' then
      do;
        CPU_Time=SUBSTR(VAR1,PRXMATCH('/\d+.\d+/',VAR1));
      end;
  run;

  data &TargetTable(keep=Col_N Col_Name Col_Content);
    set &TargetTable;
    retain Col_N;
    length Col_Name $50;
    length Col_Content $100;

    if Name NE '' then
      do;
        Col_N=N;
        Col_Name='Name';
        Col_Content=Name;
        output;
      end;
    else if Real_Time NE '' then
      do;
        Col_Name='Real_Time_Temp';
        Col_Content=Real_Time;
        output;
      end;
    else if CPU_Time NE '' then
      do;
        Col_Name='CPU_Time_Temp';
        Col_Content=CPU_Time;
        output;
      end;
  run;

  proc transpose data=&TargetTable out=&TargetTable;
    by Col_N;
    id Col_Name;
    var Col_Content;
  run;

  /* 第三步:处理结果表格中的时间变量 */
  data &TargetTable(keep=N Name Real_Time CPU_Time);
    retain N Name Real_Time CPU_Time;
    set &TargetTable(rename=(Col_N=N));
    format Real_Time TIME10.4;
    format CPU_Time TIME10.4;

    if FIND(UPCASE(Real_Time_Temp),'SECONDS') NE 0 then
      Real_Time=HMS(0,0,SCAN(Real_Time_Temp,1,' '));
    else if LENGTH(COMPRESS(Real_Time_Temp,'.','d')) EQ 2 then
      Real_Time=HMS(SCAN(Real_Time_Temp,1,':'),SCAN(Real_Time_Temp,2,':'),SCAN(Real_Time_Temp,3,':'));
    else if LENGTH(COMPRESS(Real_Time_Temp,'.','d')) EQ 1 then
      Real_Time=HMS(0,SCAN(Real_Time_Temp,1,':'),SCAN(Real_Time_Temp,2,':'));

    if FIND(UPCASE(CPU_Time_Temp),'SECONDS') NE 0 then
      CPU_Time=HMS(0,0,SCAN(CPU_Time_Temp,1,' '));
    else if LENGTH(COMPRESS(CPU_Time_Temp,'.','d')) EQ 2 then
      CPU_Time=HMS(SCAN(CPU_Time_Temp,1,':'),SCAN(CPU_Time_Temp,2,':'),SCAN(CPU_Time_Temp,3,':'));
    else if LENGTH(COMPRESS(CPU_Time_Temp,'.','d')) EQ 1 then
      CPU_Time=HMS(0,SCAN(CPU_Time_Temp,1,':'),SCAN(CPU_Time_Temp,2,':'));
  run;

%exit:
%mend;

请前往:http://www.mark-to-win.com/TeacherV2.html?id=166