网站建设项目网络图,微信运营模式,网页游戏排行榜梦幻西游,装修设计效果图网站运用import过程进行SAS数据导入完全实用教程1 单个规范格式文件导入。对单个文件进行导入是我们遇到最多的情况#xff0c;主要有以下几种#xff1a;1.1 对指定分隔符(’|’#xff0c;’’#xff0c;’!’#xff0c;’ab’等)数据的导入#xff0c;这里以’!’为例de…运用import过程进行SAS数据导入完全实用教程1 单个规范格式文件导入。对单个文件进行导入是我们遇到最多的情况主要有以下几种1.1 对指定分隔符(’|’’’’!’’ab’等)数据的导入这里以’!’为例delimiter!进行说明data _null_;file c:\temp\pipefile.txt;putX1!X2!X3!X4;put 11!22!.! ;put 111!.!333!apple;run;导入程序proc importdatafilec:\temp\pipefile.txtoutwork.testdbmsdlmreplace;delimiter!;GUESSINGROWS2000;DATAROW2;getnamesyes;run;注意GUESSINGROWS的值阈为1 到 32761.2 对CSV格式的数据进行导入data _null_;file c:\temp\csvfile.csv;putFruit1,Fruit2,Fruit3,Fruit4;putapple,banana,coconut,date;putapricot,berry,crabapple,dewberry;run;导入程序proc importdatafilec:\temp\csvfile.csvoutwork.fruitdbmscsvreplace;run;1.3 对tab分隔数据的导入data _null_;file c:\temp\tabfile.txt;put cereal 09x eggs 09xbacon;put muffin 09x berries09x toast;run;proc importdatafilec:\temp\tabfile.txtoutwork.breakfastdbmstabreplace;getnamesno;run;1.4 对dbf数据库数据进行导入proc importdatafile/myfiles/mydata.dbfoutsasuser.mydatadbmsdbfreplace;run;1.5对excel数据进行导入PROC IMPORT OUT hospital1DATAFILE C:\My Documents\Excel Files\Hospital1.xls DBMSEXCEL REPLACE;SHEETSheet1$;GETNAMESYES;MIXEDNO;SCANTEXTYES;USEDATEYES;SCANTIMEYES;RUN;1.6对access数据进行导入PROC IMPORT DBMSACCESSTABLEcustomers OUTsasuser.cust;DATABASEc:\demo\customers.mdb;UIDbob;PWDcat; WGDBc:\winnt\system32\system.mdb;RUN;proc print datasasuser.cust;run;1.7 import过程步中dbms选项汇总IdentifierInput DataSourceExtensionHost AvailabilityACCESSMicrosoft Access 2000or 2002 table.mdbMicrosoft Windows *ACCESS97Microsoft Access 97 table.mdbMicrosoft Windows *ACCESS2000Microsoft Access 2000 table.mdbMicrosoft Windows *ACCESS2002Microsoft Access 2002 table.mdbMicrosoft Windows *ACCESSCSMicrosoft Access table.mdbUNIXCSVdelimited file(comma-separated values).csvOpenVMS Alpha, UNIX, Microsoft WindowsDBFdBASE 5.0, IV, III,and III files.dbfUNIX, Microsoft WindowsDLMdelimited file (defaultdelimiter is a blank).*OpenVMS Alpha, UNIX, Microsoft WindowsEXCELExcel 2000 or 2002spreadsheet.xlsMicrosoft Windows *EXCEL4Excel 4.0spreadsheet.xlsMicrosoft WindowsEXCEL5Excel 5.0 or 7.0 (95)spreadsheet.xlsMicrosoft WindowsEXCEL97Excel 97 or 7.0 (95)spreadsheet.xlsMicrosoft Windows *EXCEL2000Excel 2000 spreadsheet.xlsMicrosoft Windows *EXCELCSExcel spreadsheet.xlsUNIXJMPJMP table.jmpUNIX, Microsoft WindowsPCFSFiles on PC server.*UNIXTABdelimited file (tab-delimited values).txtOpenVMS Alpha, UNIX, Microsoft WindowsWK1Lotus 1-2-3 Release 2spreadsheet.wk1Microsoft WindowsWK3Lotus 1-2-3 Release 3spreadsheet.wk3Microsoft WindowsWK4Lotus 1-2-3 Release 4or 5 spreadsheet.wk4Microsoft Windows2 导入一个文件夹下的所有文件的数据。2.1下面的代码导入一个文件夹下的所有文件的数据要使用本代码需注意几点首先这个文件夹下的数据文件必须是同一类型分隔的数据比如例子中都是tab分隔的txt文件当然也可以对本代码进行改进例如中间的procimport的dbms改为excel就可以导入excel文件了。其次本代码直接将文件名作为SAS数据集的名字因此文件名必须是英文且满足SAS命名规则。%macro directory(dir);%letrs%sysfunc(filename(filref,dir));%letdid%sysfunc(dopen(filref));%let nobs%sysfunc(dnum(did));%do i1 %to nobs.;%letname%qscan(%qsysfunc(dread(did,i)),1,.);%letext%qscan(%qsysfunc(dread(did,i)),-1,.);proc importoutname.datafiledir.\name..extdbmstab replace;getnamesno;datarow1;run;%end;%let rc%sysfunc(dclose(did));%mend;%directory(dirC:\PRIVATE);如果要将数据集进行汇总到一张表或者则可以直接将proc importoutname中的name改为ai然后对所有的a:数据集进行set操作。除此之外我们还可以对ext进行设置来达到读取指定文件格式的数据。2.2 这里运用pipe读取到文件名称再读取数据。首先建立三个数据集data _null_;file c:\junk\extfile1.txt;put 05JAN2001 6 W12301 1.599.54;put 12JAN2001 3 P01219 2.998.97;run;data _null_;file c:\junk\extfile2.txt;put 02FEB2001 1 P01219 2.992.99;put 05FEB2001 3 A00901 1.995.97;put 07FEB2001 2 C21135 3.006.00;run;data _null_;file c:\junk\extfile3.txt;put 06MAR2001 4 A00101 3.5914.36;put 12MAR2001 2 P01219 2.995.98;run;filename blah pipe dir C:\Junk /b;data _null_;infile blah truncoverendlast;length fname $20;input fname;i1;callsymput(fname||trim(left(put(i,8.))),scan(trim(fname),1,.));callsymput(pext||trim(left(put(i,8.))),trim(fname));if last then callsymput(total,trim(left(put(i,8.))));run;%macro test;%do i1 %tototal;proc importdatafilec:\Junk\pextioutwork.fnameidbmsdlmreplace;delimiter ;getnamesno ;run;proc printdatawork.fnamei;;titlefnamei;run;%end;%mend;%test;这里如果要导入指定文件类型的数据例如txt则只需要将filename blah pipedir C:\Junk /b;改为filename blah pipe dir C:\Junk.*.txt/b;即可。除了用filename blah pipe dir C:\Junk.*.txt/b;得到指定类型的文件名我们还可以%sysexec dir *.xls /b/o:nflist.txt;来将xls文件输出到指定的文件中供读取操作用。这个将在下面的内容作介绍。3 导入excel表中的所有sheet的数据并将其汇总到一个数据表中。3.1 Excel表是sas导入导出最多的数据表之一本例中我们将导入一个excel中的不同的数据%letdirC:\ExcelFiles;%macro ReadXls (inf);libname excellib exceldir.\inf;proc sql noprint;create tablesheetname asselecttranwrd(memname, , ) as sheetnamefromsashelp.vstabvwwherelibnameEXCELLIB;selectcount(DISTINCT sheetname) into :cnt_shtfromsheetname;selectDISTINCT sheetname into :sheet1 -:sheet%left(cnt_sht)fromsheetname;quit;libname excellib clear;%do i1 %to cnt_sht;proc importdatafiledir.\infoutsheeti replace;sheetsheeti;getnamesyes;mixedyes;run;proc append basemasterdatasheeti force;run;%end;%mend ReadXls;%ReadXls(all1.xls);这样我们可以通过%ReadXls(all2.xls);%ReadXls(all3.xls);等来得到多个excel文件的所有数据集。3.2 我们可以结合3.1和2.1或2.2的方法来读取多个文件中的多个表。这里再介绍一种新的读取多个文件的方法options noxwait;%macro ReadXls (dir);%sysexec cd dir; %sysexec dir*.xls /b/o:n flist.txt;data _indexfile;length filen$200;infiledir./flist.txt;input filen$;run;proc sql noprint;selectcount(filen) into :cntfile from _indexfile;%ifcntfile1 %then %do;select fileninto:filen1-:filen%left(cntfile)from_indexfile;%end;quit;%do i1 %to cntfile;libname excellib exceldir.\fileni;proc sql noprint;create tablesheetname asselecttranwrd(memname, , ) as sheetnamefromsashelp.vstabvwwherelibnameEXCELLIB;selectcount(DISTINCT sheetname) into :cnt_shtfromsheetname;selectDISTINCT sheetname into :sheet1 -:sheet%left(cnt_sht)fromsheetname;quit;%do j1 %to cnt_sht;proc importdatafiledir.\filenioutsheetj replace;sheetsheetj;getnamesyes;mixedyes;run;data sheetj;length_excelfilename $100 _sheetname $32;setsheetj;_excelfilenamefilenz;_sheetnamesheetj;run;proc append basemasterdatasheetj force;run;%end;libname excellib clear;%end;%mend ReadXls;%readxls (dirC:\ExcelFiles);4 从多个文件夹下读取多个数据。直接给源代码吧。%macro etl(ds, ds2,path);data ds ds2;LENGTH DateTime 8UserName $ 20Submit $ 10SentNumber $ 11IP $ 15MessageID $ 15SendingMode $ 6Contents $ 160 ;%let filrfmydir;%letrc%sysfunc(filename(filrf,path));%letdid%sysfunc(dopen(filrf));%letmemcount%sysfunc(dnum(did));%do i1 %to memcount;AccountNum1;%let counter AccountNum;%letusernamei%sysfunc(dread(did,i));%let filrefmydir2;%letfile%sysfunc(filename(filref,path\usernamei));%letop%sysfunc(dopen(filref));%letflcount%sysfunc(dnum(op));filename FT77F001D:\SMSGatewayData2\USERS\usernamei\*.log;%do j1 %to flcount;%lettransj%sysfunc(dread(op,j));%putusernamei usernameitransj transjflcount flcountfilref filreffilrf filrf;infile FT77F001 filenamefilename eoveov end done lengthL DSD;INPUT DateTime : ANYDTDTM19.UserName $Submit $SentNumber $IP $MessageID $SendingMode $Contents $;output;%end;%end;run;%mend;%etl(sms2,sms,D:\SMSGatewayData2\USERS)这里就不做测试了原理其实跟3.2差不多就是将目录也参数化。5 参考文献A Case Study of Importing Multiple Worksheet Fileshttp://www2.sas.com/proceedings/sugi31/034-31.pdfReading multiple files with PROC IMPORThttp://support.sas.com/kb/24/707.htmlHow to Invoke PROC IMPORT to read a CSV, TAB or DelimitedFilehttp://ftp.sas.com/techsup/download/sample/datastep/import.htmlUse PROC IMPORT to read a CSV, TAB or delimited file.sasReading multiple files with PROC IMPORT.sasdirectory_import_dbms.sasproc_import_dbf.htmlhttp://ftp.sas.com/