from scitbx import lbfgs
from cctbx.array_family import flex
import math,sys,os

factor_list= ['SKEW','CONTRAST','CORR_RMS','FLATNESS','REGIONS','CC_DENMOD','RFACTOR','TRUNCATE','FOM']

cc_min=0.0

best_single=None
best_single_cc=None
dict_t_dict={}
single_dict={}
for t in factor_list:
  dict_t={}
  cc_perf_list=flex.double()
  cc_calc_list=flex.double()
  for line in open(t+'_EST.list').readlines():
    spl=line.split()
    key=spl[4]
    cc_perf=float(spl[3])
    cc_calc=float(spl[1])
    dict_t[key]=[cc_perf,cc_calc]
    cc_perf_list.append(cc_perf)
    cc_calc_list.append(cc_calc)
  dict_t_dict[t]=(dict_t)
  c=flex.linear_correlation(cc_perf_list,cc_calc_list)
  cc=c.coefficient()
  print "CC: ",t,cc
  single_dict[t]=cc
  if best_single is None or cc>best_single_cc:
     best_single=t
     best_single_cc=cc

print "Best single: ",best_single,best_single_cc

pair_dict_1={}
print "Covariance MATRIX"
for t2 in factor_list: print t2,
print
print
for t1 in factor_list:
 pair_dict_2={}
 pair_dict_1[t1]=pair_dict_2
 print
 print t1,
 for t2 in factor_list:
   t1_delta_list=flex.double()
   t2_delta_list=flex.double()
   dict_t1=dict_t_dict[t1]
   dict_t2=dict_t_dict[t2]
   for key in  dict_t1.keys():
     if not key in dict_t2.keys(): continue
     t1_perf,t1_calc=dict_t1[key]
     t2_perf,t2_calc=dict_t2[key]
     if t1_perf != t2_perf: print "not same? ",key,t1,t2,t1_perf,t2_perf
     t1_delta_list.append(t1_calc-t1_perf) 
     t2_delta_list.append(t2_calc-t2_perf) 
   c=flex.linear_correlation(t1_delta_list,t2_delta_list)
   cc=c.coefficient()
   #print  "OVERALL CC: ",cc,t1,t2,len(t1_delta_list)
   print " %6.2f " %(cc),
   pair_dict_2[t2]=cc

# find subsets with minimal mutual covariance:
all_factor_dict={}
for t1 in factor_list:
 if t1 != best_single:
   all_factor_dict[t1]=True

best_subset=[best_single]

best_t=True
while best_t and len(all_factor_dict.keys()):
  lowest_cc=None
  best_t=None
  for t in all_factor_dict.keys():
    if single_dict[t]<cc_min: continue
    new_subset=best_subset+[t]
    high_for_new_subset=None
    for t1 in new_subset:
      for t2 in new_subset:
        if t1 is t2: continue
        if high_for_new_subset is None or \
               pair_dict_1[t1][t2]>high_for_new_subset:
          high_for_new_subset=pair_dict_1[t1][t2] 
    if lowest_cc is None or high_for_new_subset<lowest_cc:
      lowest_cc=high_for_new_subset
      best_t=t
  best_subset=best_subset+[best_t]
  if best_t in all_factor_dict.keys(): del all_factor_dict[best_t]
  print "\nNew best: ",best_subset,lowest_cc


