* (1) USE SAS FUNCTIONS;
* an example;
BIRTHDAY = MDY(MonthBorn, DayBorn, YearBorn);
*Functions can be nested;
newvalue = INT(LOG(10));
* The result of this example is 2;
* (2)USING IF-THEN STATEMENT;
IF condition THEN action;
* notice that a single IF-THEN can only have one action, u need DO-END loop to do several actions;
IF condition THEN DO;
action1;
action2;
END;
* ELSE,can be used to group variable;
IF condition1 THEN DO;
action1;
action2;
END;
ELSE IF condition2 THEN action3;
ELSE action4;
* Notice that MISSING was the smallest in SAS;
* (3)SUBSETTING DATA;
IF SEX = 'F';
IF SEX = 'M' THEN DELETE; *These two statement are equivalent;
* (4)INFORMATS AND FORMATS;
* A SAS date is a numeric value that equal to the number of the days since Jan 1,1960;
'supposed that u have a date variable like that 1995/03/01 named BIRTHDAY';
DATA TEST1;
INFILE 'TEST1.csv' DLM = ',';
INPUT BIRTHDAY YYMMDD10.;
RUN;
'In some cases, when SAS sees a two-digit year like 95/03/01, SAS has to decide which century the year belongs';
OPTIONS YEARCUTOFF = 1950; *This statement tells SAS to read dates occurring between 1950 to 2049;
* A SAS date can be used for calculation;
Duedate = Checkdate + 21;
* U can use a date as a constant in a SAS expression;
Earthday14 = '22APR2014'D;* By this expression, a variable named Earthday14 is created, and its value is 2014/04/22.;
* Functions: SAS has many date functions;
CurrentAge = INT(YRDIF(Birthdate, TODAY(),'AGE'));* These functions calculated current Age using ones birthday;
* FORMAT your data in PROC statement;
PROC PRINT DATA = TEST1;
FORMAT BIRTHDAY MMDDYY8. Duedate WEEKDATE17.;
TITLE 'SAS Date value with formats';
RUN;
* (5)RETAIN AND SUM STATEMENT;
* By default, SAS sets the values of all variables equals to missing at the start of each iteration of the DATA step;
* IF a variable appears in a RETAIN statement, its value will be retained from one previous iteration of the DATA Step to the next;
* The SUM statement also retains the previous value, but it then adds the value with the next value and make a sum in each iteration;
RETAIN var-list initial-value; *u can also identify your initial value instead of MISSING;
VARAIBLE + EXPRESSION;
DATA TEST1;
INFILE 'FILE.dat';
INPUT VAR1;
RETAIN MAXS;
MAXS = MAX(MAXS,VAR1);
SUMVAR1 + VAR1;
RUN;
* (6)Using ARRAY TO SIMPLIFY YOUR PROGRAMS;
ARRAY array_name {n} $ var-list;*ARRAY is only defined in DATA step;
* example;
DATA SONGS;
INFILE 'C:\MYRAWDATA\KBRK.dat';
INPUT CITY $ 1-15 AGE WJ KT TR FLIP TRR;
ARRAT song {5} WJ KT TR FLIP TRR;
DO i = 1 TO 5;
IF song{i} = 9 THEN song{i} =.;
END;
RUN;
* (7)Using Abbreviated list;
* numbered range list;
INPUT CAT1 CAT2 CAT3 CAT4 CAT5;
INPUT CAT1 - CAT5;
* name range list,u need to know the internal order of these variables. Use PROC CONTENTS with POSITION option can see;
PUT Y C S H B;
PUT Y -- B;
* name prefix list;
DOGBILLS = SUM(DOGVET,DOGFOOD,DOGCARE);
DOGBILLS = SUM(OF DOG:);
* special SAS name list;
MEAN(OF _NUMERIC_);
PUT _ALL_;