Merge branch 'master' of https://github.com/hagabbar/vitamin_b

hagabbar · Jul 31, 2020 · c62450b · c62450b
2 parents 49774cd + 40b802f
commit c62450b
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 237 deletions.
diff --git a/setup.py b/setup.py
@@ -64,14 +64,14 @@ def readfile(filename):
     return filecontents
 
 
-VERSION = '0.2.7'
+VERSION = '0.2.8'
 version_file = write_version_file(VERSION)
 long_description = get_long_description()
 
 
 setup(
     name='vitamin_b',
-    version='0.2.7',    
+    version='0.2.8',    
     description='A user-friendly machine learning Bayesian inference library',
     long_description=long_description,
     long_description_content_type='text/markdown',

diff --git a/vitamin_b/__init__.py b/vitamin_b/__init__.py
@@ -36,7 +36,7 @@
 
 from . import run_vitamin
 
-__version__ = "0.2.7"
+__version__ = "0.2.8"
 __author__ = 'Hunter Gabbard'
 __credits__ = 'University of Glasgow'
 
diff --git a/vitamin_b/models/CVAE_model.py b/vitamin_b/models/CVAE_model.py
@@ -676,8 +676,10 @@ def truncnorm(i,lp):    # we set up a function that adds the log-likelihoods and
                 plt.ylabel('cost')
                 plt.legend()
                 plt.savefig('%s/latest_%s/cost_%s.png' % (params['plot_dir'],params['run_label'],params['run_label']))
+                print('Saving unzoomed cost to -> %s/latest_%s/cost_%s.png' % (params['plot_dir'],params['run_label'],params['run_label']))
                 plt.ylim([np.min(np.array(plotdata)[-int(0.9*np.array(plotdata).shape[0]):,0]), np.max(np.array(plotdata)[-int(0.9*np.array(plotdata).shape[0]):,1])])
                 plt.savefig('%s/latest_%s/cost_zoom_%s.png' % (params['plot_dir'],params['run_label'],params['run_label']))
+                print('Saving zoomed cost to -> %s/latest_%s/cost_zoom_%s.png' % (params['plot_dir'],params['run_label'],params['run_label']))
                 plt.close('all')
 
             except:
@@ -707,7 +709,6 @@ def truncnorm(i,lp):    # we set up a function that adds the log-likelihoods and
                     # Save loss plot data
                     np.savetxt(save_dir.split('/')[0] + '/loss_data.txt', np.array(plotdata))
                 except FileNotFoundError as err:
-                    print(err)
                     pass
 
         if i % params['save_interval'] == 0 and i > 0:
@@ -767,7 +768,9 @@ def truncnorm(i,lp):    # we set up a function that adds the log-likelihoods and
 
 
                 plt.savefig('%s/corner_plot_%s_%d-%d.png' % (params['plot_dir'],params['run_label'],i,j))
+                print('Saved corner plot iteration %d to -> %s/corner_plot_%s_%d-%d.png' % (i,params['plot_dir'],params['run_label'],i,j))
                 plt.savefig('%s/latest_%s/corner_plot_%s_%d.png' % (params['plot_dir'],params['run_label'],params['run_label'],j))
+                print('Saved latest corner plot to -> %s/latest_%s/corner_plot_%s_%d.png' % (params['plot_dir'],params['run_label'],params['run_label'],j))
                 plt.close('all')
                 print('Made corner plot %d' % j)
 

diff --git a/vitamin_b/plotting.py b/vitamin_b/plotting.py
@@ -801,8 +801,9 @@ def plot_pp(self,model,sig_test,par_test,i_epoch,normscales,pos_test,bounds):
         for l in leg.legendHandles:
             l.set_alpha(1.0)
         plt.tight_layout()
-        fig.savefig('%s/pp_plot_%04d.png' % (self.params['plot_dir'],i_epoch),dpi=360)
+        #fig.savefig('%s/pp_plot_%04d.png' % (self.params['plot_dir'],i_epoch),dpi=360)
         fig.savefig('%s/latest_%s/latest_pp_plot.png' % (self.params['plot_dir'],self.params['run_label']),dpi=360)
+        print('Saved pp plot to -> %s/latest_%s/latest_pp_plot.png' % (self.params['plot_dir'],self.params['run_label']))
         plt.close(fig)
         # TODO add this back in
         hf.close()
@@ -832,11 +833,12 @@ def plot_loss(self):
         plt.legend()
         plt.tight_layout()
         plt.savefig('%s/latest_%s/cost_%s.png' % (self.params['plot_dir'],self.params['run_label'],self.params['run_label']),dpi=360)
+        print('Saved cost unzoomed plot to -> %s/latest_%s/cost_%s.png' % (self.params['plot_dir'],self.params['run_label'],self.params['run_label']))
         plt.ylim([np.min(np.array(plotdata)[-int(0.9*np.array(plotdata).shape[0]):,0]), np.max(np.array(plotdata)[-int(0.9*np.array(plotdata).shape[0]):,1])])
         plt.savefig('%s/latest_%s/cost_zoom_%s.png' % (self.params['plot_dir'],self.params['run_label'],self.params['run_label']),dpi=360)
+        print('Saved cost zoomed plot to -> %s/latest_%s/cost_zoom_%s.png' % (self.params['plot_dir'],self.params['run_label'],self.params['run_label']))
         plt.close('all')
 
-
         return
 
     def gen_kl_plots(self,model,sig_test,par_test,normscales,bounds,snrs_test):
@@ -1123,235 +1125,5 @@ def my_kde_bandwidth(obj, fac=1.0):
         fig_kl.savefig('%s/latest_%s/hist-kl.png' % (self.params['plot_dir'],self.params['run_label']),dpi=360)
         plt.close(fig_kl)
         hf.close()
-        return
-
-        """
-        # Compute dynesty with itself once
-        sampler1 = 'dynesty1'
-        sampler2 = 'dynesty1'
-        logbins = np.logspace(-3,2.5,50)
-        time_dict = {}
-
-        set1,time_dict[sampler1] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler1) 
-#        set2,time_dict[sampler2] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler2)
-        set2 = np.copy(set1)
-        tot_kl_std = []
-        tot_kl_mean = []
-        for r in range(self.params['r']**2):
-            std_kl, mean_kl = compute_kl(set1[r],set2[r],[sampler1,sampler2])
-            tot_kl_std.append(std_kl)
-            tot_kl_mean.append(mean_kl)
-            print(tot_kl_std[r])
-            print('Completed KL for set %s-%s and test sample %s' % (sampler1,sampler2,str(r)))
-        tot_kl_std = np.array(tot_kl_std)
-        tot_kl_mean = np.array(tot_kl_mean)
-
-        # linear plot
-        fig_kl, axis_kl = plt.subplots(1,1,figsize=(6,6))
-        axis_kl.hist(tot_kl_std,bins=50,histtype='stepfilled',density=True,color='grey')
-        axis_kl.set_xlabel(r'$\mathrm{KL-Statistic}$',fontsize=14)
-        axis_kl.set_ylabel(r'$p(\mathrm{KL})$',fontsize=14)
-        axis_kl.tick_params(axis="x", labelsize=14)
-        axis_kl.tick_params(axis="y", labelsize=14)
-        leg = axis_kl.legend(loc='upper right', fontsize=10) #'medium')
-        for l in leg.legendHandles:
-            l.set_alpha(1.0)
-
-        axis_kl.set_xlim(left=1e-2,right=1)
-        axis_kl.grid(False)
-        fig_kl.canvas.draw()
-        fig_kl.savefig('/home/hunter.gabbard/public_html/CBC/VItamin/dynesty_with_itself_linear.png')
-        plt.close()
-
-        # log plot
-        fig_kl, axis_kl = plt.subplots(1,1,figsize=(6,6))
-        axis_kl.hist(tot_kl_std,bins=logbins,histtype='stepfilled',density=True,color='grey')
-        axis_kl.set_xlabel(r'$\mathrm{KL-Statistic}$',fontsize=14)
-        axis_kl.set_ylabel(r'$p(\mathrm{KL})$',fontsize=14)
-        axis_kl.tick_params(axis="x", labelsize=14)
-        axis_kl.tick_params(axis="y", labelsize=14)
-        leg = axis_kl.legend(loc='upper right', fontsize=10) #'medium')
-        for l in leg.legendHandles:
-            l.set_alpha(1.0)
-
-        axis_kl.set_xlim(left=1e-2,right=1)
-        axis_kl.set_xscale('log')
-        axis_kl.set_yscale('log')
-        axis_kl.grid(False)
-        fig_kl.canvas.draw()
-        fig_kl.savefig('/home/hunter.gabbard/public_html/CBC/VItamin/dynesty_with_itself_log.png')
-        plt.close()
-
-        print()
-        print('Mean std KL is below:')
-        print(np.mean(tot_kl_std))
-        print()
-        print('Mean KL is below: ')
-        print(np.mean(tot_kl_mean))
-        print('Finished computing KL with itself')
-        exit()
-        """
-
-        tot_kl_grey = np.array([])
-        fig_kl, axis_kl = plt.subplots(1,1,figsize=(6,6))
-        time_dict = {}
-        # single plot KL approach 
-        for i in range(len(usesamps)):
-            for j in range(tmp_idx):
-
-                # Load appropriate test sets
-                if samplers[i] == samplers[::-1][j]:
-                    print_cnt+=1
-                    sampler1, sampler2 = samplers[i]+'1', samplers[::-1][j]+'1'
-
-                    # currently not doing KL of approaches with themselves, so skip here
-                    continue
-                    if self.params['load_plot_data'] == False:
-                        set1,time_dict[sampler1] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler1)
-                        set2,time_dict[sampler2] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler2)
-                else:
-                    sampler1, sampler2 = samplers[i]+'1', samplers[::-1][j]+'1'
-
-                    if self.params['load_plot_data'] == False:
-                        set1,time_dict[sampler1] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler1,vitamin_pred_made=vi_pred_made)
-                        set2,time_dict[sampler2] = self.load_test_set(model,sig_test,par_test,normscales,bounds,sampler=sampler2,vitamin_pred_made=vi_pred_made)
-
-                        # check if vitamin test posteriors were generated for the first time
-                        if sampler1 == 'vitamin1' and vi_pred_made == None:
-                            vi_pred_made = [set1,time_dict[sampler1]]
-                        elif sampler2 == 'vitamin1' and vi_pred_made == None:
-                            vi_pred_made = [set2,time_dict[sampler2]]
-
-                if self.params['load_plot_data'] == True:
-                    tot_kl = np.array(hf['%s-%s' % (sampler1,sampler2)])
-                else:
-
-                    # Iterate over test cases
-                    tot_kl = []  # total KL over all infered parameters
-                    indi_kl = np.zeros((self.params['r']**2,len(self.params['inf_pars']))) # KL for each individual paramter
-
-                    if self.params['make_indi_kl'] == True:
-                        for r in range(self.params['r']**2):
-                            indi_kl[r,:] = compute_kl(set1[r],set2[r],[sampler1,sampler2],one_D=True)
-                            print('Completed KL for set %s-%s and test sample %s' % (sampler1,sampler2,str(r)))
-                    for r in range(self.params['r']**2):
-                        tot_kl.append(compute_kl(set1[r],set2[r],[sampler1,sampler2]))
-                        print('Completed KL for set %s-%s and test sample %s' % (sampler1,sampler2,str(r)))
-                    tot_kl = np.array(tot_kl)
-
-                if self.params['load_plot_data'] == False:
-
-                    # Save results to h5py file
-                    hf.create_dataset('%s-%s' % (sampler1,sampler2), data=tot_kl)
-
-                logbins = np.logspace(-3,2.5,50)
-                logbins_indi = np.logspace(-3,3,50)
-                #logbins = 50
-
-                if samplers[i] == 'vitamin' or samplers[::-1][j] == 'vitamin':
-                    # print 10 worst and 10 best kl
-                    print(tot_kl.argsort()[-15:][::-1])
-                    print(np.sort(tot_kl)[-15:][::-1])
-                    print(tot_kl.argsort()[:15][:])
-                    print(np.sort(tot_kl)[:15][:])
-
-                    # plot vitamin kls
-                    axis_kl.hist(tot_kl,bins=logbins,alpha=0.5,histtype='stepfilled',density=True,color=CB_color_cycle[print_cnt],label=r'$\textrm{VItamin-%s}$' % (samplers[::-1][j]),zorder=2)
-                    axis_kl.hist(tot_kl,bins=logbins,histtype='step',density=True,facecolor='None',ls='-',lw=2,edgecolor=CB_color_cycle[print_cnt],zorder=10)
-
-                    if self.params['make_indi_kl'] == True:
-                        # plot indi vitamin kls
-                        for u in range(len(self.params['inf_pars'])):
-                            indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,alpha=0.5,histtype='stepfilled',density=True,color=CB_color_cycle[print_cnt],label=r'$\textrm{VItamin vs. %s}$' % (samplers[::-1][j]),zorder=2)
-                            indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,histtype='step',density=True,facecolor='None',ls='-',lw=0.5,edgecolor=CB_color_cycle[print_cnt],zorder=10)    
-                        print('Mean total KL vitamin vs bilby: %s' % str(np.mean(tot_kl)))
-
-                    # Return the mean KL if doing hyperparameter optimization
-                    if self.params['hyperparam_optim'] == True:
-                        return np.mean(tot_kl)
-                else:
-                    print(tot_kl.argsort()[-15:][::-1])
-                    print(np.sort(tot_kl)[-15:][::-1])
-                    print(tot_kl.argsort()[:15][:])
-                    print(np.sort(tot_kl)[:15][:]) 
-
-
-                    tot_kl_grey = np.append(tot_kl_grey,tot_kl)
-
-                    if label_idx == 0:
-
-                        if self.params['make_indi_kl'] == True:
-                            # plot indi bayesian kls
-                            for u in range(len(self.params['inf_pars'])):
-                                indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,alpha=0.8,histtype='stepfilled',density=True,color='grey',label=r'$\textrm{other samplers}$',zorder=1)
-
-                        label_idx += 1
-                    else:
-                        if self.params['make_indi_kl'] == True:
-                            # plot indi bayesian kls
-                            for u in range(len(self.params['inf_pars'])):
-                                indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,alpha=0.8,histtype='stepfilled',density=True,color='grey',zorder=1)
-
-                    if self.params['make_indi_kl'] == True:
-                        # plot indi bayesian kls
-                        for u in range(len(self.params['inf_pars'])):
-                            indi_axis_kl[u].hist(indi_kl[:,u],bins=logbins_indi,histtype='step',density=True,facecolor='None',ls='-',lw=0.2,edgecolor='grey',zorder=1)
-
-                    print('Mean total KL between bilby samps: %s' % str(np.mean(tot_kl)))
-
-                print('Completed KL calculation %d/%d' % (print_cnt,len(usesamps)*2))
-                print_cnt+=1
-
-            tmp_idx -= 1
-            if self.params['load_plot_data'] == False:
-                runtime[sampler1] = time_dict[sampler1]
-
-        # plot non indi bayesian kls
-        axis_kl.hist(tot_kl_grey,bins=logbins,alpha=0.8,histtype='stepfilled',density=True,color='grey',label=r'$\textrm{other samplers vs. other samplers}$',zorder=1)
-        # plot non indi bayesian kls
-        axis_kl.hist(tot_kl_grey,bins=logbins,histtype='step',density=True,facecolor='None',ls='-',lw=2,edgecolor='grey',zorder=1)
-
-        if self.params['load_plot_data'] == False:
-#            # Print sampler runtimes
-            for i in range(len(usesamps)):
-                hf.create_dataset('%s_runtime' % (samplers[i]), data=np.array(runtime[samplers[i]+'1']))
-                print('%s sampler runtimes: %s' % (samplers[i]+'1',str(runtime[samplers[i]+'1'])))
-
-
-
-        # Save KL histogram
-        axis_kl.set_xlabel(r'$\mathrm{KL-Statistic}$',fontsize=14)
-        axis_kl.set_ylabel(r'$p(\mathrm{KL})$',fontsize=14)
-        axis_kl.tick_params(axis="x", labelsize=14)
-        axis_kl.tick_params(axis="y", labelsize=14)
-        leg = axis_kl.legend(loc='upper right', fontsize=10) #'medium')
-        for l in leg.legendHandles: 
-            l.set_alpha(1.0)
-
-        axis_kl.set_xlim(left=8e-2,right=100)
-        axis_kl.set_xscale('log')
-        axis_kl.set_yscale('log')
-        axis_kl.grid(False)
-        fig_kl.canvas.draw()
-#        fig_kl.savefig('%s/latest_%s/hist-kl.png' % (self.params['plot_dir'],self.params['run_label']),dpi=360)
-        plt.close(fig_kl)
-
-        if self.params['make_indi_kl'] == True:
-            # save indi kl histogram
-            for u in range(len(self.params['inf_pars'])):
-                indi_axis_kl[u].set_xlabel(r'$\mathrm{%s}$' % self.params['inf_pars'][u],fontsize=5)
-                indi_axis_kl[u].tick_params(axis="x", labelsize=5)
-                indi_axis_kl[u].tick_params(axis="y", labelsize=5)
-
-                indi_axis_kl[u].set_xlim(left=1e-3)
-                indi_axis_kl[u].set_xscale('log')
-                indi_axis_kl[u].set_yscale('log')
-                indi_axis_kl[u].grid(False)
-            indi_axis_kl[7].set_visible(False)
-            indi_axis_kl[8].set_visible(False)
-            indi_fig_kl.canvas.draw()
-            indi_fig_kl.savefig('%s/latest_%s/hist-kl_individual_par.png' % (self.params['plot_dir'],self.params['run_label']),dpi=360)
-            plt.close(indi_fig_kl)
-
-        hf.close()
+        print('Saved KL plot to -> %s/latest_%s/hist-kl.png' % (self.params['plot_dir'],self.params['run_label']))
         return
diff --git a/vitamin_b/run_vitamin.py b/vitamin_b/run_vitamin.py
@@ -729,6 +729,7 @@ def train(params=params,bounds=bounds,fixed_vals=fixed_vals,resume_training=Fals
         plt.close('all')
         plot_convergence(search_result)
         plt.savefig('%s/latest_%s/hyperpar_convergence.png' % (params['plot_dir'],params['run_label']))
+        print('Saved hyperparameter convergence loss to -> %s/latest_%s/hyperpar_convergence.png' % (params['plot_dir'],params['run_label']))
         print('Did a hyperparameter search') 
 
     # train using user defined params
@@ -1049,6 +1050,7 @@ def test(params=params,bounds=bounds,fixed_vals=fixed_vals):
         plt.close()
         del figure
         print('Made corner plot: %s' % str(i+1))
+        print('Saved corner plot to -> %s/latest_%s/corner_plot_%s_%d.png' % (params['plot_dir'],params['run_label'],params['run_label'],i))
 
         # Store ML predictions for later plotting use
         VI_pred_all.append(VI_pred)
@@ -1199,6 +1201,7 @@ def gen_samples(params='params.txt',bounds='bounds.txt',fixed_vals='fixed_vals.t
         figure = corner.corner(samples[0,:,:],truths=x_data[0,:],labels=parnames)
         plt.savefig('./vitamin_example_corner.png')
         plt.close()
+        print('Saved corner plot to -> ./vitamin_example_corner.png')
 
     return samples, y_data_test, x_data