Programming Examples of SPS and Matlab
SPS
Project Summary
The changes in glycated hemoglobin and blood glucose levels two hours after a meal, compared to baseline, were selected as efficacy indicators. The differences between drug groups with different concentrations and the placebo group were analyzed using t-tests and analysis of variance to evaluate the drug's effectiveness.
The incidence of hypoglycemia was used as a safety indicator, and the chi-square test was applied to compare group differences and confirm the drug's safety profile.
Linear regression analysis revealed that there was no significant linear relationship between age and the efficacy indicators.
Code
问题1(药物治疗差异分析):
/* 导入dm.sas7bdat数据集 */
data dm;
set "C:/Users/buzz/Desktop/期末/SAS/dm.sas7bdat";
run;
/* 导入lb.sas7bdat数据集 */
data lb;
set "C:/Users/buzz/Desktop/期末/SAS/lb.sas7bdat";
run;
/* 根据受试者唯一标识符连接dm和lb数据集 */
proc sql;
create table merged_data as
select a.USUBJID, a.ACTARMCD, b.LBTESTCD,
b.VISIT, b.LBORRES,b.LBBLFL
from dm as a
inner join lb as b
on a.USUBJID = b.USUBJID;
quit;
proc sort data=merged_data;
by USUBJID;
run;
/* 进行分析 */
data analysis_data;
set merged_data;
where LBTESTCD = "HBA1C";
retain baseline_hba1c;
by USUBJID;
if LBBLFL = '是' then baseline_hba1c = LBORRES;
if VISIT = '给药后第12周' then do;
hba1c_change = LBORRES - baseline_hba1c; /* 计算HbA1c变化 */
output;
end;
keep USUBJID ACTARMCD hba1c_change;
run;
data analysis_data;
set analysis_data;
if ACTARMCD = 'A' then ACTARMCD = 'B';
if ACTARMCD = 'C' then ACTARMCD = 'B';
run;
proc ttest data=analysis_data;
class ACTARMCD;
var hba1c_change;
run;
/* 根据B组筛选数据 */
data B_group;
set merged_data;
where (ACTARMCD = 'A'|ACTARMCD = 'C')& LBTESTCD = 'HBA1C';
if ACTARMCD = 'A'|ACTARMCD = 'C' then ACTARMCD = 'B';
keep USUBJID VISIT LBORRES LBBLFL;
run;
/* 合并基线和12周后的数据 */
data compare_data;
set B_group;
where LBBLFL = '是'|VISIT = '给药后第12周';
run;
/*EXCEL处理数据导入compare_clean_data*/
/*作图*/
proc gplot data=compare_clean_data;
symbol1 i=join v=none line=2 c=blue;
symbol2 i=join v=none line=1 c=red;
plot LBORRES1*USUBJID=1 LBORRES2*USUBJID=2 / overlay NOAXIS;
title '药物组治疗12周后HbA1c相对基线变化'
run;
问题2(药物有效性分析):
/* 导入lb.sas7bdat数据集 */
data lb;
set "C:/Users/buzz/Desktop/期末/SAS/lb.sas7bdat";
run;
/* 筛选餐后2小时血糖(2h-PPG)的数据 */
data lb1;
set lb;
where LBTPT= '餐后2小时';
run;
/* 导入sv.sas7bdat数据集 */
data sv;
set "C:/Users/buzz/Desktop/期末/SAS/sv.sas7bdat";
run;
/* 筛选出基线、第4周、第8周和第12周的访视数据 */
data sv1;
set sv;
if VISIT in ("基线期", "给药后第4周", "给药后第8周", "给药后第12周");
run;
/* 导入dm.sas7bdat数据集 */
data dm;
set "C:/Users/buzz/Desktop/期末/SAS/dm.sas7bdat";
run;
proc sql;
create table sv_lb_dm as
select a.USUBJID,a.VISIT,a.LBTESTCD,a.LBTPT, a.LBORRES, b.ACTARMCD
from lb1 as a inner join dm as b
on a.USUBJID=b.USUBJID; /*添加第一个on条件*/
quit;
data glucose;
set sv_lb_dm;
retain BASE_2HPPG W4_2HPPG W8_2HPPG W12_2HPPG;
if VISIT='导入期(-4天)' then BASE_2HPPG=LBORRES;
if VISIT='给药后第4周' then W4_2HPPG=LBORRES;
if VISIT='给药后第8周' then W8_2HPPG=LBORRES;
if VISIT='给药后第12周' then W12_2HPPG=LBORRES;
run;
data glucose_change;
set glucose;
CHANGE_2HPPG_W4=W4_2HPPG-BASE_2HPPG; /*计算餐后2小时血糖的变化量*/
CHANGE_2HPPG_W8=W8_2HPPG-BASE_2HPPG;
CHANGE_2HPPG_W12=W12_2HPPG-BASE_2HPPG;
run;
/* 第五步:使用PROC ANOVA过程对变化量进行方差分析,并进行多重比较,得到不同治疗组之间的差异和置信区间。 */
data glucose_change_clean;
set glucose_change;
where VISIT = '给药后第12周';
run;
proc anova data=glucose_change_clean;
class ACTARMCD;
model CHANGE_2HPPG_W4=ACTARMCD;
means ACTARMCD / tukey alpha=0.05;
run;
proc anova data=glucose_change_clean;
class ACTARMCD;
model CHANGE_2HPPG_W8=ACTARMCD;
means ACTARMCD / tukey alpha=0.05;
run;
proc anova data=glucose_change_clean;
class ACTARMCD;
model CHANGE_2HPPG_W12=ACTARMCD;
means ACTARMCD / tukey alpha=0.05;
run;
问题3(药物安全性评价):
data dm;
set "C:/Users/buzz/Desktop/期末/SAS/dm.sas7bdat";
run;
data ae;
set "C:/Users/buzz/Desktop/期末/SAS/ae.sas7bdat";
run;
proc sql;
create table merged_data_3 as
select a.USUBJID, a.ACTARMCD,
b.AECAT
from dm as a
inner join ae as b
on a.USUBJID = b.USUBJID;
quit;
proc sort data=merged_data_3;
by USUBJID;
run;
data low_blood_sugar_no_duplicates;
set merged_data_3;
where AECAT = "低血糖事件";
by USUBJID;
if first.USUBJID;
run;
proc freq data=dm noprint;
tables ACTARMCD / out=group_counts(keep=ACTARMCD COUNT);
run;
proc freq data=low_blood_sugar_no_duplicates noprint ;
tables ACTARMCD / out=low_blood_sugar_counts(keep=ACTARMCD COUNT);
run;
/*EXCEL处理数据 导入为merged_counts*/
proc print data=merged_counts;
var ACTARMCD COUNT1 COUNT2 ratio;
format ratio percent8.2;
title '每个组的低血糖人数和占比';
run;
问题4(药物治疗影响因素分析):
data dm;
set "C:/Users/buzz/Desktop/期末/SAS/dm.sas7bdat";
run;
data lb;
set "C:/Users/buzz/Desktop/期末/SAS/lb.sas7bdat";
run;
proc sql;
create table merged_data as
select a.USUBJID, a.ACTARMCD,a.AGE,b.LBTESTCD,
b.VISIT, b.LBORRES,b.LBBLFL
from dm as a
inner join lb as b
on a.USUBJID = b.USUBJID;
quit;
proc sort data=merged_data;
by USUBJID;
run;
/* 数据整理 */
data analysis_data;
set merged_data;
where LBTESTCD = "HBA1C";
retain baseline_hba1c;
by USUBJID;
if LBBLFL = '是' then baseline_hba1c = LBORRES; /* 将LBBLFL = '是'的数据作为基线HbA1c */
if VISIT = '给药后第12周' then do;
hba1c_change = LBORRES - baseline_hba1c; /* 计算HbA1c变化 */
output;
end;
keep USUBJID AGE hba1c_change;
run;
/* 执行线性回归分析 */
proc reg data=analysis_data;
model hba1c_change = AGE;
run;
MatLab
Project Summary
2022 Hangzhou Dianzi University 23rd Mathematical Modeling Competition
B: Data Hiding Technology
In practice, a lot of information can be simplified to only two different options, such as gender, whether to leave the province in the past 14 days, whether there is a family history of genetic diseases, whether the code is green, etc., study the two sets of binary data in the attachment to ensure that the information of each individual can be protected, give a solution with the least amount of hidden data, and establish a general mathematical model for privacy protection of binary data tables.
Code
%% Kmeans算法
% K 数据一共多少类
% iniCentriods初始聚类中心
% iterations 迭代次数
% Idx 返回的分类标号
% centroids 每一类的类簇中心
% Distance 类内总距离
function [Idx,centroids,Distance]=KMeans(data,K,iniCentriods,iterations)
[numOfData,numOfAttr]=size(data);%numOfData是数据个数,numOfAttr是数据维数
centroids=iniCentriods;
%% 迭代
for iter=1:iterations
pre_centroids=centroids; %上一次求得的中心位置
tags=zeros(numOfData,K);
%% 寻找最近中心,更新中心
for i=1:numOfData
D=zeros(1,K);% 每个数据点与每个聚类中心的标准差
Dist=D;
% 计算每个点到每个中心点的标准差
for j=1:K
Dist(j)=norm(data(i,:)-centroids(j,:),2);
end
[minDistance,index]=min(Dist);% 寻找距离最小的类别索引
tags(i,index)=1;% 标记最小距离所处的位置(类别)
end
%% 取均值更新聚类中心点
for i=1:K
if sum(tags(:,i))~=0
% 未出现空类,计算均值作为下一聚类中心
for j=1:numOfAttr
centroids(i,j)=sum(tags(:,i).*data(:,j))/sum(tags(:,i));
end
else % 如果出现空类,从数据集中随机选中一个点作为中心
randidx = randperm(size(data, 1));
centroids(i,:) = data(randidx(1),:);
tags(randidx,:)=0;
tags(randidx,i)=1;
end
end
if sum(norm(pre_centroids-centroids,2))<0.001 %不断迭代直到位置不再变化
break;
end
end
%% 计算输出结果
Distance=zeros(numOfData,1);
Idx=zeros(numOfData,1);
for i=1:numOfData
D=zeros(1,K);% 每个数据点与每个聚类中心的标准差
Dist=D;
% 计算每个点到每个中心点的标准差
for j=1:K
Dist(j)=norm(data(i,:)-centroids(j,:),2);
end
[distance,idx]=min(Dist);% 寻找距离最小的类别索引
distance=Dist(idx);
Distance(i)=distance;
Idx(i)=idx;
end
%Distance=sum(Distance,1);% 计算类内总距离
End
%%整体运算
Kbest=1
totalmin=2000
%%寻找最优K值,for循环(数据一K=1:15;数据二K=1:40)
for K=1:40
%% 产生随机初始点
[numOfData,numOfAttr]=size(data); % numOfData是数据个数,numOfAttr是数据维数
centroids=zeros(K,numOfAttr); % 随机初始化,最终迭代到每一类的中心位置
maxAttr=zeros(numOfAttr); % 每一维最大的数
minAttr=zeros(numOfAttr); % 每一维最小的数
for i=1:numOfAttr
maxAttr(i)=max(data(:,i)); % 每一维最大的数
minAttr(i)=min(data(:,i)); % 每一维最小的数
for j=1:K
centroids(j,i)=maxAttr(i)+(minAttr(i)-maxAttr(i))*rand();%随机初始化,选取每一维[min max]中初始化
end
end
[Idx,C,distance]=KMeans(data,K,centroids,500);% 调用KMeans
Distance=sum(distance)% 计算类内距离之和
total=0
%%计算“*”个数
for i=1:K
count1=0
j=0
count1=sum(C(i,:)==0)+sum(C(i,:)==1);%判断类簇中心每组中有多少0与1
j=sum(Idx==i)%%计算对应类的原始数据组数
if count1==numOfAttr
total=total+j
else
total=total+(numOfAttr-count1)*j %对“*”个数累计计数
end
end
%记录“*”个数最少的最优K
if total<totalmin
Kbest=K
totalmin=total
end
end