/*This program demonstrates how to work with serial data using transpose, arrays, do loops and dim function; Also does some plotting. Data are borrowed from Professor Lardaro's RI economic indicators web site http://members.cox.net/lardaro/current.htm*/ libname lab 'C:\1datalap\learning\psy532x'; run; PROC IMPORT OUT= x DATAFILE= "C:\1dataLap\personal\econ_ind.xls" DBMS=EXCEL REPLACE; GETNAMES=NO; MIXED=NO; SCANTEXT=YES; USEDATE=YES; SCANTIME=YES; RUN; proc print; run; /*flip the data*/ proc transpose data=x out=vert; run; proc print data = vert; run; data x1; set x; /*Make one vertical column of all data--'stacking'*/ array econ(1:12) F2-F13; do k=1 to 12; if econ(k)= 58 then econ(k)=.; econ1 = econ{k}; output; end; drop F2-F13; run; proc print; run; data lab.serial_ex; set x1; time = _N_; if time le 12 then year = '1983'; if time ge 13 and time lt 27 then year = '1984'; /*etc.*/ run; proc print; var time k year ; run; /*now try some descriptives and plotting*/ proc capability data = lab.serial_ex; var econ1; histogram econ1; run; proc gplot data=lab.serial_ex; symbol i=spline v=dot h=2; plot econ1 * time; run; /*what the heck, try some time series (assuming the economy has no seasonality, does not need to be differenced, and follows AR(1)--in other words, rather unthoughtfully*/ proc arima data=lab.serial_ex; identify var = econ1; estimate p=1 q=1; run; /*Assignment option 1: go to http://www.technion.ac.il/docs/sas/ets/chap2/sect27.htm and try some other time series plots due in two weeks*/ data missing_ex1; set x; /*set up a flag variable for missingness*/ miss_flag1=0; If F2 in (50,83) then miss_flag1 = 1; miss_flag2=0; If F3 in (50,83) then miss_flag2 = 1; miss_flag3=0; If F4 in (50,83) then miss_flag3 = 1; Where_miss = (100 * miss_flag3) + (10 * miss_flag2) + (miss_flag1); run; proc print data = missing_ex1; var Where_miss miss_flag1 miss_flag2 miss_flag3; run; Proc freq data = missing_ex1; tables Where_miss; *By group; /*Assignment option 2: create a group and then compare missingness for two groups*/ run; data missing_ex2; set x; /*making one vertical column of all data, again*/ array econ(1:12) F2-F13; do k=1 to 12; if econ(k) in (58,83,67) then econ(k)=.; econ1 = econ{k}; output; end; drop k; run; data missing_ex2a; set missing_ex2; time = _N_; run; proc print data = missing_ex2a; run; proc gplot data=missing_ex2a; symbol i=spline v=circle h=2; plot econ1 * time; run; proc arima data=missing_ex2a; identify var = econ1; estimate p=1 q=1; run; /*using arrays to recode all the variables for given criteria*/ data missing_ex3; set x; /*recode all values of 58 ACROSS VARIABLES to missing*/ array econ(1:12) F2-F13; do k=1 to 12; if econ(k)= 58 then econ(k)=.; econ1 = econ{k}; output; end; drop k; run; quit; Proc means n nmiss data=missing_ex3; run; proc print data = missing_ex3; var econ1; run; data missing_ex4; set x; /*recode all the variables to missing, for all numeric variables*/ Breed = 'doberman'; if F2 = 100 then breed = ' '; array nvar(*) _numeric_; do i=1 to dim(nvar); if nvar(i)=67 then nvar(i)=.; end; run; quit; proc print data = missing_ex4; *var F13; run; Proc means n nmiss data=missing_ex4; run; /*Assignment option 3: use the above code and other tricks that you have learned to calculate the overall percent missingness in the database*/ /*Assignment option 4: create some additional time series type plots using any of the above databases; optionally: experiment with adding economic indicators and reviewing correlations or cross correlations in the TS framework; Lardaro's web page has several options; */ Data x1; set x; January = F2; February = F3; March = F4; April = F5; May = F6; June = F7; July = F8; August = F9; September = F10; October = F11; November = F12; December = F13; YEAR = F1; drop F1-F13; run;