#Example of the use of the functions in IPprobit_lib #source("IPprobit_lib.R") n<- 20 #Sample size p<- 6 #total number of covariates (genes) #Simulating a design matrix set.seed(0) gene_expression<-matrix(runif(n*p)*4,nrow=n,ncol=p) solve(t(gene_expression)%*%gene_expression) #assume age is a clinical covariate. age<-sample(20:40,n,replace=TRUE) truth_betavector<- c(-0.1,-.01,1,-1,rep(0,p+2-4)) #truth_betavector<- c(-0.1, -.01, 1, 0, 0, 1, 0, 0) #NEW design <-cbind(1,age,gene_expression) ###Simulating from the simulation true model the ##the latent rv y and the 0-1 z y_tmp<- apply(design,1,function(xi){rnorm(n=1,xi%*%truth_betavector)}) y<- y_tmp[order(y_tmp)] x<- design[order(y_tmp),-c(1:2)]##Setting the zeros first real_probs <-pnorm(design%*%truth_betavector)[order(y_tmp)] #plot(x[,2],y) n0<- sum(y<0) #n0 <- 3 #NEW n1<- n-n0 if((n0==0) || (n1==0)) stop("all ones or all zeros in dataset!\n") z<-c(rep(0,n0),rep(1,n1)) ##Assume we have other columns that we do not care about mydata<-data.frame(cbind(z,y,real_probs,age,x)) names(mydata)<-c("z","y","prob","age",paste("GE",1:p,sep="")) mydata[,1:5] n_simulations <- 10 #It has to be greater than two keep <- 5 index_response <- 1 #the binary outcome indices_keep <- c(4) #the age will be in all models indices_analyze <- 5:(5+p-1) #indices were the GE is stored File <- "./bestexample_IPprobit.txt" k_max <- 5 #No model will include all the GE data VSel_start <-rep(0,length(indices_analyze)) VSel_start <-c(0,0,0,1,1,1) #I start the search #with a useless model mydata2 <- as.matrix(mydata) #system.time(temp<-model_search_less_than_k_max(VSel_start,index_response,indices_analyze,indices_keep,dataset=mydata,a=.2,keep=keep,nsim=n_simulations,k_max=k_max,File=File,saving_every=5,read_Keep=0)) #VSel_start<-temp[1,(1:p)] #My results: It selects the best model in less than 10 iterations #Total of possible models 2^6-1=63 #[1] "2 : 1,2 --> 3.28228803900062e-05" #[1] "3 : 1,2,4 --> 1.59325163019125e-05" #[1] "1 : 1 --> 1.09550200238722e-05" #[1] "2 : 2,5 --> 4.22107445345917e-06" #[1] "2 : 2,4 --> 3.70575630620589e-06" #[1] "saving results in ./bestexample_IPprobit.txt" ##I choose the model with the y information #source("linearREgFn01.R")#Here i redefine the functio score #temp<-model_search_less_than_k_max(VSel_start,index_response=2,indices_analyze,indices_keep,dataset=mydata,a=.2,keep=keep,nsim=n_simulations*2,k_max=k_max,File=File,saving_every=5,read_Keep=0)