/*SAS Code for manual*/

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*To The Student - How to Use SAS With This Book*/
data work.dataset;
	input Score;
	datalines; 
1
2
3
4
;
run;

DATA work.dataset;
	INPUT Score;
	DATALINES; 
1
2
3
4
;
RUN;

DATA work.dataset; INPUT Score; DATALINES; 
1
2
3
4
;
RUN;

DATA work.dataset; INPUT Score; DATALINES; 
1 2 3 4
;
RUN;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*1.7 Creating data files and defining variables*/

/*Approach A*/
DATA myfirstdata;
 INPUT Class_1 Class_2 Class_3;
 Datalines;
3.3 3.9 2.7
2.9 4.0 2.3
3.5 2.4 2.2
3.6 3.1 3.0
3.1 3.0 2.8
;
proc print;
RUN;

/*Make a new data table with a character column*/
DATA myseconddata;
 INPUT Class_1 Class_2 Class_3 Class_4 $;
 Datalines;
3.3 3.9 2.7 A
2.9 4.0 2.3 C
3.5 2.4 2.2 B-
3.6 3.1 3.0 A
3.1 3.0 2.8 B+
;
proc print;
RUN;

/*Approach B*/
%let path=/folders/myfolders/APracticeFolder; 

data work.GPA;
	infile "&path/GPA_Scores.csv" dlm=',';
	input Class1  Class2  Class3;
run; 

proc contents data=work.GPA;
run;

proc print data=work.GPA;
run;

/*Example 3*/
data work.Example3;
	infile "&path/Practice3.csv" dlm=',';
	input ID $ Age SAT Gender $ Grade $ Exam;
run;

proc print data=work.Example3;
run;

proc contents data=work.Example3;
run;

proc format;
	value $Gender "F"="Female"
				  "M"="Male";
run; 

title1 "Class Information";
title2 "Created Mar 22 2016";

proc print data=work.Example3;
	format Gender $Gender.;
run; 

data work.Subset;
	set work.Example3;
	where Age>=18 and Gender="M";
	Curve=Exam+5;
	Drop Age;
run;

proc print data=work.subset;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*2.4 Frequency Distributions for Quantitative Data*/

data complaints;
	input Complaints;
	Datalines;
45 
66 
88 
92 
101 
55 
78 
86 
91 
94 
98 
66 
55 
110 
85 
95 
66 
92 
77 
80 
83 
88 
76 
79 
90 
91 
73 
51 
88 
102 
50 
95 
115 
105 
92 
92 
58 
63 
86 
107 
86 
73
66 
101 
81
;
proc print;
run;

data freqcomplaints;
	input Complaints @@;
	Datalines;
45 66 88 92 101 55 78 86 91 94 98 66 55 110 85 95 66 92 77 80 83 88 76 79 90 91 73 51 88 102 50 95 115 105 92 92 58 63 86 107 86 73 66 101 81
;
proc print;
run;

proc freq data=freqcomplaints;
   tables Complaints;
   title 'Complaints Frequencies';
run;

/*2.7 Frequency Distributions for Categorical Data*/

data work.BMI;
	input BMI $ Total@@;
	datalines;
lean 15 healthy 30 overweight 35 obese 20
;
proc print data=work.BMI;
run;


proc freq data=work.BMI;
   tables BMI;
   weight Total;
   title 'BMI Frequencies';
run;

/*2.7 Frequency Distributions for Categorical Data*/
/*This is the same code as the previous set, except we add A, B, C, and D to our categories*/
data work.BMI2;
	input BMI $ Total@@;
	datalines;
ALean 15 BHealthy 30 COverweight 35 DObese 20
;
proc print data=work.BMI2;
run;

proc freq data=work.BMI2;
   tables BMI;
   weight Total;
   title 'BMI Frequencies';
run;

/*2.12 Histograms, Bar Charts, and Pie Charts*/

data work.numbers;
	input Numbers@@;
	datalines;
1 2 3 2 4 4 3 6 6 5 5 6 7 5 8 7 8 9 4 5
;
proc print data=work.numbers;
run;

proc univariate data=work.numbers;
   histogram;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*3.6 Mean, Median, and Mode*/

data work.clips;
	input Clips@@;
	datalines;
41 65 123 46 48 87 38 90 132 115 80 80 64 59 51 36 80 143 122 100
;
run;

proc print data=work.clips;
run;

proc univariate data=work.clips;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*4.11 Range, Variance, and Standard Deviation*/
/*You can use the same code as in section 3.6 to get the range, variance, and sd*/

data work.clips;
	input Clips@@;
	datalines;
41 65 123 46 48 87 38 90 132 115 80 80 64 59 51 36 80 143 122 100
;
run;

proc print data=work.clips;
run;

proc univariate data=work.clips;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*5.6 Probability Tables*/

data work.Hospital;
   input Hospital $ Insurance $ Count;
   datalines;
Private Uninsured  30
Private Insured  40
Public Uninsured  80
Public Insured 50
;

proc freq data=work.Hospital;
   tables Hospital*Insurance;
   weight Count;
   title 'Birth Frequency';
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*6.8 Converting Raw Scores to Standard z Scores*/

data work.SAT;
	input SATScore@@;
	datalines;
500 750 600 900 950 880 990 560 780 800 800 450 800 680 550 600
;
run;

proc print data=work.SAT;
run;

PROC STANDARD data=work.SAT mean=0 STD=1 OUT=zSAT;
run;

proc print data=zSAT;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*7.7 Estimating the Standard Error of the Mean*/

data work.ecall;
	input Reaction@@;
	datalines;
93 66 30 44 20 100 35 58 70 81
;
run;

proc print data=work.ecall;
run;

proc univariate data=work.ecall;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*9.6 One Sample t Test*/

data work.OCD;
	input Score@@;
	datalines;
20 60 48 92 50 82 48 90 30 68 43 54 60 62 94 67 63 85
;
run;

proc print data=work.OCD;
run;

proc ttest data=work.OCD h0=77.43 alpha=0.05;
	var Score;
run;

/*9.6 Two Independent Sample t Test*/

data Calories;
      input Speed $ Calories @@;
      datalines;
Slow 700 Slow 450 Slow 850 Slow 600 Slow 450 Slow 550
Fast 450 Fast 800 Fast 750 Fast 700 Fast 550 Fast 650
;
run;

proc print data=work.Calories;
run;

proc ttest data=work.Calories;
     class Speed;
     var Calories;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*10.6 The Related Samples t Test*/

data work.Supervision;
	input Present Absent@@;
	datalines;
220 210 245 220 215 195 260 265 300 275 280 290 250 220 310 285
;
run;

proc print data=work.Supervision;
run;

proc ttest data=work.Supervision;
paired Present*Absent;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*11.5 Confidence Intervals for the One-Sample t Test*/

data work.OCD;
	input Score@@;
	datalines;
20 60 48 92 50 82 48 90 30 68 43 54 60 62 94 67 63 85
;
run;

proc print data=work.OCD;
run;

proc ttest data=work.OCD h0=77.43 alpha=0.1;
	var Score;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*11.7 Confidence Intervals for the Two-Independent-Sample t Test*/

data Calories;
      input Speed $ Calories @@;
      datalines;
Slow 700 Slow 450 Slow 850 Slow 600 Slow 450 Slow 550
Fast 450 Fast 800 Fast 750 Fast 700 Fast 550 Fast 650
;
run;

proc print data=work.Calories;
run;

proc ttest data=work.Calories;
     class Speed;
     var Calories;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*11.9 Confidence Intervals for the Related-Samples t Test*/

data work.Supervision;
	input Present Absent@@;
	datalines;
220 210 245 220 215 195 260 265 300 275 280 290 250 220 310 285
;
run;

proc print data=work.Supervision;
run;

proc ttest data=work.Supervision;
paired Present*Absent;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*12.8 The One-Way Between-Subjects ANOVA*/

data work.Stress;
	input Stress $ Score@@;
	datalines;
Low 3.4 Low 3.2 Low 3.0 Low 3.0 Low 3.5 Low 3.8 Low 3.6 Low 4.0 Low 3.9 Low 2.9
Moderate 3.5 Moderate 3.6 Moderate 2.7 Moderate 3.5 Moderate 3.8 Moderate 2.9
Moderate 3.4 Moderate 3.2 Moderate 3.3 Moderate 3.1 
High 2.9 High 3.0 High 2.6 High 3.3 High 3.7 High 2.7 High 2.4 High 2.5 High 3.3 High 3.4
;
run;

proc print data=work.Stress;
run;

proc anova data=work.Stress;
	class Stress;
	model Score = Stress;
run;

proc anova data=work.Stress;
	class Stress;
	model Score = Stress;
	means Stress / tukey;

run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*13.6 The One-Way Within-Subjects ANOVA*/

data work.Adver;
	input Subject Cues Generic Smoking;
	datalines;
1 2 5 5
2 3 5 6
3 1 4 5 
4 4 5 7
5 4 3 6
6 5 4 7
7 2 2 6
;
run;

proc ANOVA data=work.Adver;
	model Cues Generic Smoking= /NOUNI;
	repeated Time 3 (1 2 3);
run;

proc ttest data=work.Adver;
	paired Cues*Generic Cues*Smoking Generic*Smoking;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*14.8 The Two-Way Between-Subjects ANOVA*/

data work.Sugars;
	input Buffet $ Exposure $ Times@@;
	datalines;
Absent Low 8 Absent Low 7 Absent Low 9 Absent Low 10 Absent Low 12 Absent Low 8
Absent Mod 10 Absent Mod 12 Absent Mod 15 Absent Mod 8 Absent Mod 6 Absent Mod 9
Absent High 13 Absent High 9 Absent High 11 Absent High 8 Absent High 13 Absent High 12
Present Low 5 Present Low 8 Present Low 5 Present Low 6 Present Low 5 Present Low 7
Present Mod 15 Present Mod 10 Present Mod 8 Present Mod 9 Present Mod 7 Present Mod 11
Present High 15 Present High 12 Present High 15 Present High 16 Present High 12 Present High 14
;
run;

proc print data=work.Sugars;
run;

proc anova data=work.Sugars;
	class Buffet Exposure;
	model Times = Buffet Exposure Buffet*Exposure;
	means Buffet Exposure / tukey;
    means Buffet Exposure Buffet*Exposure / tukey;
run;

proc glm data=work.Sugars;
	class Buffet Exposure;
	model Times = Buffet Exposure Buffet*Exposure;
	means Buffet Exposure / tukey;
    means Buffet Exposure Buffet*Exposure / tukey;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*15.4 Pearson Correlation Coefficient*/

data work.Mood;
	input Mood Eating@@;
	datalines;
6 480 4 490 7 500 4 590 2 600 5 400 3 545 1 650
;
run;

proc corr data=work.Mood;
   var  Mood Eating;
run;

/*15.8 Spearman Correlation Coefficient*/

data work.Ranks;
	input Food Water@@;
	datalines;
1 1 1 3 3 2 4 6 5 4 6 7 7 8 8 5
;
run;

proc corr data=work.Ranks Spearman;
	var Food Water;
run;

/*15.10 Point-Biserial Correlation Coefficient*/

data work.Comedy;
	input Sex Laughter@@;
	datalines;
1 23 1 9 1 12 1 12 1 29 2 32 2 10 2 8 2 20 2 12 2 24 2 34
;
run;

 /* Define the BISERIAL macro */
%let path=/folders/myfolders; 

    
%biserial(data=work.Comedy, contin=Laughter, binary=Sex, out=out1);

proc print data=out1 label noobs;
	title 'Point Biserial, Biserial and Rank Biserial Correlations';
run;
 
 

%macro biserial(version, data= ,contin= ,binary= ,out=);

%if &version ne %then %put BISERIAL macro Version 2.2;

options nonotes;
* exclude observations with missing variables *;
data &out;
 set &data;
 where &contin>.;
 if &binary>.;
 run;

* compute the ranks for the continuous variable *;
proc rank data=&out out=&out ;
 var &contin;
 ranks r_contin;
 run;

* compute proportion of binary, std of contin, and n *;
proc means data=&out noprint;
 var &binary &contin;
 output out=_temp_(keep=p stdy n) mean=p std=stdx stdy n=n;
 run;

* sort by the binary variable *;
proc sort data=&out;
 by descending &binary;
 run;

* compute mean of contin and rank of contin var *;
proc means data=&out noprint;
 by notsorted &binary;
 var &contin r_contin;
 output out=&out mean=my r_contin;
 run;

* restructure the means computed in the step above *;
proc transpose data=&out out=&out(rename=(col1=my1 col2=my0));
 var r_contin my;
 run;

* combine the data needed to compute biserial correlation *;
data &out;
 set &out(drop= _name_ _label_);
 retain r1 r0 ;
 if _n_=1 then do;
  r1=my1;
  r0=my0;
 end;
 else do;
  set _temp_;
  output;
 end;
 run;

* compute point biserial correlation *;
proc corr data=&data  noprint outp=_temp_;
 var &binary &contin;
 run;



* extract the point biserial correlation from the matrix *;
data _temp_(keep=pntbisrl);
 set _temp_(rename=(&contin=pntbisrl));
 if _TYPE_='CORR' and &binary<>1 then output;

 run;

options notes;
* compute biserial and rank biserial *;
data &out;
 merge _temp_  &out;
 if pntbisrl=1 then delete;
 h=probit(1-p);
 u=exp(-h*h/2)/sqrt(2*arcos(-1));
 biserial=p*(1-p)*(my1-my0)/stdy/u;
 rnkbisrl=2*(r1-r0)/n;

 keep biserial pntbisrl rnkbisrl;
 label biserial='Biserial Corr'
       pntbisrl='Point Biserial Corr'
       rnkbisrl='Rank Biserial Corr';
 run;

%mend;

/*15.4 Pearson Correlation Coefficient*/

data work.Emp;
	input Emp Happy@@;
	datalines;
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 1 0 1 0 1 0 1 0 1 0 1 1 0 1 0 1 0 1 0 1 0 1 0
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1  
;
run;

proc corr data=work.Emp;
   var  Emp Happy;
run;
       
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*16.7 Analysis of Regression*/ 

data work.Symp;
	input Sessions Symptoms@@;
	datalines;
9 0 5 3 8 2 2 5 6 3 3 4 5 2 4 3
;
run;

proc reg data=work.Symp;
	model Symptoms=Sessions;
run;
                
/*16.13 Multiple Regression Analysis*/ 

data work.Sales;
	input Age Education Sales@@;
	datalines;
19 12 20 21 14 40 26 13 30 28 18 68 32 17 70 30 16 60
;
run;                   

proc reg data=work.Sales;
	model Sales=Age Education;
run;
    
/*Standardized beta coefficients*/
proc reg data=work.Sales;
	model Sales=Age Education / stb;
run;   

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*17.3 The Chi-Square Goodness- of-Fit Test*/

data work.Recall;
	input Recall $ Count;
	datalines;
Did 58 
DidNot 12 
Unsure 10
;
run;

proc freq data = work.Recall;
   tables Recall / chisq testp=(80 10 10);
   weight Count;
 run;
             
/*17.9 The Chi-Square Test for Independence*/

data work.Counsel;
	input Counsel $ Completion $ Count;
	datalines;
Family Complete 22
Family Termination 12
Ind Complete 31
Ind Termination 45
;
run;

proc freq data=work.Counsel;
	tables Counsel*Completion / chisq;
	weight Count;
run;

/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*####################*/
/*18.3 The Related-Samples Sign Test*/

data work.Outburst;
	input Sub Full@@;
	datalines;
3 2 2 0 5 4 3 3 4 2 2 0 0 2 3 1 1 0 6 4 4 3
;
run;

data work.Outburst2;
	set work.Outburst;
	diff=Sub-Full;
run;

proc univariate data=work.Outburst2;
	var diff;
run;

/*18.5 The Wilcoxon Signed-Ranks T Test (dependent/related)*/

data work.Smoked;
	input Before Following@@;
	datalines;
23 20 12 16 11 10 15 0 25 5 20 8 11 0 9 15 13 8 15 13 30 12 21 0
;
run;

data work.Smoked2;
	set Smoked;
	diff= Before-Following;
run;

proc univariate data=work.Smoked2;
	var diff;
run;

/*18.7 The Mann-Whitney U Test (independent)*/

data work.JobSat;
	input Shift $ Score@@;
	datalines;
Day 88 Day 72 Day 93 Day 67 Day 62
Night 24 Night 55 Night 70 Night 60 Night 50
;
run;

proc npar1way data=work.JobSat wilcoxon;
	class Shift;
	var Score;
	exact wilcoxon;
run;

/*18.9 The Kruskal-Wallis H Test*/

data work.Driving;
	input Clip $ Score@@;
	datalines;
A 88 A 67 A 22 A 14 A 42 B 92 B 76 B 80 B 77 B 90 C 50 C 55 C 43 C 65 C 39
;
run;

proc npar1way data=work.Driving wilcoxon;
	class Clip;
	var Score;
run;


/*18.11 The Friedman Test*/

data work.Trimester;
	input Subject Trimester $ Number@@;
	datalines;
1 First 3 2 First 6 3 First 2 4 First 4 5 First 4 6 First 4 7 First 8 
1 Second 5 2 Second 4 3 Second 0 4 Second 3 5 Second 6 6 Second 3 7 Second 6
1 Third 8 2 Third 7 3 Third 5 4 Third 2 5 Third 9 6 Third 7 7 Third 5
;
run;

proc freq data=work.Trimester;
	tables Subject*Trimester*Number/ 
             cmh2 scores=rank noprint;
run;