from phenix.substructure.hyss import patterson
from phenix.substructure.hyss import two_site_translation
from phenix.substructure.hyss import extrapolation_scan
from phenix.substructure.hyss import tangent_formula
from phenix.substructure.hyss import squaring
from phenix.substructure.hyss import structure_from_clusters
from phenix.substructure.hyss import p1_recycling
from phenix.substructure.hyss import minimization
from cctbx import euclidean_model_matching as emma
from cctbx import xray
from cctbx import maptbx
from cctbx import miller
from cctbx.array_family import flex
from phenix.substructure import shelxd
from scitbx.python_utils import dicts
from scitbx.python_utils.misc import store
from scitbx.python_utils import signal_utils
from libtbx.math_utils import iceil
from libtbx.utils import time_log
from libtbx import adopt_init_args
from libtbx.utils import Sorry
import sys

class substructure_parameters(object):

  def __init__(self, n_sites,
                     min_distance=None,
                     general_positions_only=False,
                     min_distance_sym_equiv=None,
                     min_cross_distance=None):
    if (int(n_sites) != n_sites or int(n_sites) < 1):
      raise AssertionError(
        "n_sites must be an integral number and greater than zero.")
    n_sites = int(n_sites)
    assert [min_distance, min_distance_sym_equiv, min_cross_distance].count(None) != 3
    if (min_distance_sym_equiv is None):
      if (min_distance is None):
        min_distance_sym_equiv = min_cross_distance
      else:
        min_distance_sym_equiv = min_distance
    if (min_cross_distance is None):
      if (min_distance is None):
        min_cross_distance = min_distance_sym_equiv
      else:
        min_cross_distance = min_distance
    min_distance = min(min_distance_sym_equiv, min_cross_distance)
    adopt_init_args(self, locals())

  def show(self,out=sys.stdout):
    for key in self.__dict__:
      print >>out, "Substructure parameter: %s:" % key, self.__dict__[key]

class search_parameters(object):

  def __init__(self, i_chunk=0,
                     n_chunk=1,
                     n_patterson_vectors=None,
                     n_fragments=None,
                     n_high_extrapolation_scan_correlation_group=0,
                     min_low_correlation=0.1,
                     enable_early_termination=True,
                     high_low_correlation_factor=2,
                     compare_sites_if_correlation_is_over=0.2,
                     max_relative_spread_top_correlations=0.1,
                     n_top_models_to_compare=None,
                     factor_top_n_matches=2/3.,
                     e_min=1.5,
                     real_space_squaring=None,
                     p1_n_recycling_cycles=None,
                     n_recycling_cycles=None,
                     site_recycling_candidate_factor=0.9,
                     site_recycling_use_factor=2/3.,
                     site_recycling_min_use=0,
                     site_recycling_use_heights_as_occupancies=None,
                     n_sites_final_factor=None,
                     occupancy_minimization=None,
                     minimize_consensus_model=None,
                     truncate_consensus_model=True,
                     cos_sin_table=True,
                     input_add_model=None,
                     input_emma_model_list=[],
                     score_only=False,
                     keep_seed_sites=0,
                     n_shake=0,
                     rms_shake=1.,
                     skip_optimization=False,
                     skip_consensus=False,
                     comparison_emma_model=None,
                     comparison_emma_model_tolerance=1.5,
                     dump_all_models=None,
                     dump_all_fragments=None,
                     verbose=False,
                     max_tries=None,
                     tries_to_skip=None,
                     cluster_termination=None,
                     max_view=10,
                     score_initial_solutions=False,
                     minimum_fragments_to_consider=None,
                     matches_must_not_share_patterson_vector=None,
                     random_search=False,
                     n_random_search=1000,
                     minimum_reflections_for_phaser=500,
                     high_resolution_cutoff=None, # Note:cutoff is done in hyss
                     seeds_to_use=None, # Note:seeds_to_use is done in hyss
                     unit_occupancy=None, # Note hyss
               ):
    adopt_init_args(self, locals())
    assert n_chunk > 0
    assert 0 <= i_chunk < n_chunk

  def show(self,out=sys.stdout):
    for key in self.__dict__:
      print >>out, "Search parameter: %s:" % key, self.__dict__[key]

  def compute_derived(self, n_sites):
    import random
    if (n_sites < 10):
      if (self.p1_n_recycling_cycles is None):
        self.p1_n_recycling_cycles = 0
      if (self.n_recycling_cycles is None):
        self.n_recycling_cycles = 15
      if (self.site_recycling_use_heights_as_occupancies is None):
        self.site_recycling_use_heights_as_occupancies = True
    else:
      if (self.p1_n_recycling_cycles is None):
        self.p1_n_recycling_cycles = 10
      if (self.n_recycling_cycles is None):
        self.n_recycling_cycles = 10
      if (self.site_recycling_use_heights_as_occupancies is None):
        self.site_recycling_use_heights_as_occupancies = False
    self.n_candidate_sites = iceil(
      n_sites * self.site_recycling_candidate_factor)
    self.n_use_sites = iceil(n_sites * self.site_recycling_use_factor)
    if (self.n_use_sites < self.site_recycling_min_use):
      self.n_use_sites = n_sites
    if (self.n_candidate_sites < self.n_use_sites):
      self.n_candidate_sites = self.n_use_sites + 1
    if (self.n_patterson_vectors is None):
      self.n_patterson_vectors = min(100, max(33, n_sites//2))

    if (self.n_shake is None):
      self.n_shake=0
    if self.rms_shake is None:
      self.rms_shake=0.

    if (self.n_fragments is None):
      self.n_fragments = min(10, max(3, n_sites//20))
      if self.minimum_fragments_to_consider:
         self.n_fragments=max(self.n_fragments,
            self.minimum_fragments_to_consider)
    if (self.n_top_models_to_compare is None):
      if (n_sites < 5):
        if self.matches_must_not_share_patterson_vector:
          self.n_top_models_to_compare = 4
        else:
          self.n_top_models_to_compare = 5
      elif (n_sites < 10):
        if self.matches_must_not_share_patterson_vector:
          self.n_top_models_to_compare = 3
        else:
          self.n_top_models_to_compare = 4
      elif (n_sites < 20):
        if self.matches_must_not_share_patterson_vector:
          self.n_top_models_to_compare = 3 # never 2
        else:
          self.n_top_models_to_compare = 3
      else:
        self.n_top_models_to_compare = 2
    self.min_top_n_matches = iceil(n_sites * self.factor_top_n_matches)
    if (self.n_sites_final_factor is None):
      self.n_sites_final_factor = max(1, 1.5-0.5*n_sites/30)
    self.n_sites_final = iceil(n_sites * self.n_sites_final_factor)
    if (self.occupancy_minimization is None):
      self.occupancy_minimization = (n_sites < 30)
    if (self.minimize_consensus_model is None):
      self.minimize_consensus_model = (n_sites < 30)

  def adjust_to_p1(self, space_group):
    if (self.p1_n_recycling_cycles > 0 and space_group.order_p() == 1):
      self.n_recycling_cycles += self.p1_n_recycling_cycles
      self.p1_n_recycling_cycles = 0

  def determine_peak_search_effective_resolution(self,
        n_sites,
        min_cross_distance,
        d_min):
    if (n_sites < 20 and min_cross_distance < d_min):
      self.peak_search_effective_resolution = d_min
    else:
      self.peak_search_effective_resolution = None

class top_solutions:
  def get_top_solutions(self,max_structures=None):
    if not max_structures: return
    sort_list=[]
    if len(self.extrapolation_scan_scores) != \
       len(self.structures):
       self.extrapolation_scan_scores=\
          len(self.structures)*[None]
    for structure,fs,frf,ess in zip(
     self.structures,
     self.final_scores,
     self.fully_recycled_flags,
     self.extrapolation_scan_scores):
      sort_list.append([structure.info.final_score,structure,fs,frf,ess])
    sort_list.sort()
    sort_list.reverse()
    self.structures = []
    self.final_scores = []
    self.fully_recycled_flags = []
    self.extrapolation_scan_scores = []
    for [info,structure,fs,frf,ess] in \
        sort_list[:max_structures]:
      self.structures.append(structure)
      self.final_scores.append(fs)
      self.fully_recycled_flags.append(frf)
      self.extrapolation_scan_scores.append(ess)

class subprocess_search_result(top_solutions):
  def __init__(self,search_object,max_structures=None):
    self.max_structures=max_structures
    if search_object:
      self.structures=search_object.structures
      self.final_scores=search_object.final_scores
      self.fully_recycled_flags=search_object.fully_recycled_flags
      self.extrapolation_scan_scores=search_object.extrapolation_scan_scores
    else:
      self.structures=[]
      self.final_scores=[]
      self.fully_recycled_flags=[]
      self.extrapolation_scan_scores=[]
    if hasattr(search_object,'output'):
      self.output=search_object.output
    else:
      self.output=''

    if self.max_structures:  # take top N results
      self.get_top_solutions(max_structures=self.max_structures)

  def combine(self,search_object):  #add in
    if search_object is None: return
    self.structures+=search_object.structures
    self.final_scores+=search_object.final_scores
    self.fully_recycled_flags+=search_object.fully_recycled_flags
    self.extrapolation_scan_scores+=search_object.extrapolation_scan_scores
    self.output+=search_object.output

class search_manager (object) :
  def __init__ (self,kw,max_structures=20):
    self.kw=kw
    self.max_structures=max_structures

  def __call__ (self, i_chunk=None) :
    from copy import deepcopy
    kw=deepcopy(self.kw)
    kw['search_params'].i_chunk=i_chunk
    sr=subprocess_search_result(search(**kw),
      max_structures=self.max_structures)
    return sr

class sort_result:
  def __init__(self,fragment_list=[],score_list=[]):
    self.fragment_list=fragment_list
    self.score_list=score_list

  def combine(self,sort_object):  #add in
    self.fragment_list+=sort_object.fragment_list
    self.score_list+=sort_object.score_list

def sorter(fragment_list=[],n_chunk=None,i_chunk=None,
      scoring=None):
  score_list=[]
  output_fragment_list=[]
  for fragment in fragment_list:
    if (fragment.model_number % n_chunk != i_chunk): continue
    try:
      score = scoring.minimize( structure = fragment)
    except Exception,e:
      print >>out,"Warning scoring failed ...carrying on. \n"
      continue
    score_list.append(score.score())
    output_fragment_list.append(fragment)
  return sort_result(fragment_list=output_fragment_list,score_list=score_list)

class sort_manager (object) :
  def __init__ (self,n_chunk=None,fragment_list=None,scoring=None):
    self.n_chunk=n_chunk
    self.fragment_list=fragment_list
    self.scoring=scoring

  def __call__ (self, i_chunk=None) :
    return sorter(fragment_list=self.fragment_list,
      n_chunk=self.n_chunk,
      i_chunk=i_chunk,
      scoring=self.scoring)


class search(object,top_solutions):

  def __init__(self, f_obs=None,
                     substructure_params=None,
                     search_params=None,
                     patterson_peak_search_params=None,
                     two_site_translation_params=None,
                     tangent_refinement_params=None,
                     squaring_params=None,
                     f_original = None,
                     scattering_type = None,
                     wavelength = None,
                     llgc_sigma = None,
                     rescore = "correlation",
                     extrapolation = "fast_nv1995",
                     nproc=None,
                     out=None,
                     write_to_text=False,
                     fragment_list=None,
                     previous_search_result=None,
                     return_unsorted_patterson_solutions=False,
                     replace_patterson_with_input_emma_models=False,
                     max_structures=None,
                     dummy_search=False,
                     ):

    kw=locals()  # keywords as they came in

    if out is None:
      if write_to_text:
        from cStringIO import StringIO
        out=StringIO()
      else:
        out=sys.stdout
    self.terminated_early=False
    self.top_matches_are_dummy_matches=None
    self.failed=False
    self.time_total = time_log("hyss total").start()

    self.n_id=10000
    # Incoming data
    self.f_obs = f_obs
    self.f_original = f_original
    self.scattering_type = scattering_type
    self.wavelength = wavelength
    self.previous_search_result = previous_search_result
    self.structures=[]

    # Derived data
    self.cb_op_niggli = self.f_obs.change_of_basis_op_to_niggli_cell()
    self.cb_op_niggli_inverse = self.cb_op_niggli.inverse()
    self.f_obs_niggli = (
        self.f_obs.change_basis( self.cb_op_niggli )
            .map_to_asu()
            .set_info("Primitive Niggli setting")
        )
    self.extra = {}

    if (search_params is None):
      search_params = search_parameters()

    if search_params.verbose:
      local_out=out
      self.need_to_close_local_out=False
    else:
     from libtbx.utils import null_out
     local_out=null_out()
     self.need_to_close_local_out=True # close out on return
    print >>local_out,"Hyss: data summary"
    self.f_obs.show_summary(f=local_out)
    print >>local_out,"Transformation to Niggli cell:", self.cb_op_niggli.c()
    print >>local_out,"Data on Niggli setting"
    self.f_obs_niggli.show_summary(f=local_out)


    self.search_params = search_params
    search_params.adjust_to_p1( self.f_obs_niggli.space_group() )
    search_params.compute_derived(n_sites=substructure_params.n_sites)

    print>>local_out, "hyss: using shelxd.q_value_estimation"
    if not self.f_obs_niggli.data().size():
      raise Sorry("Sorry, not enough reflections to run HySS (%d suitable reflections in range) " %(
     self.f_obs_niggli.data().size() ))
    self.structure_factors = search_structure_factors(
      q_all = shelxd.q_value_estimation( f_obs = self.f_obs_niggli ),
      e_min = search_params.e_min,
      )

    assert tangent_refinement_params is None or squaring_params is None
    substructure_params.show(out=local_out)
    self.substructure_params = substructure_params
    search_params.determine_peak_search_effective_resolution(
      n_sites=substructure_params.n_sites,
      min_cross_distance=substructure_params.min_cross_distance,
      d_min=self.structure_factors.q_large.d_min())
    search_params.show(out=local_out)
    out.flush()
    self.time_patterson_peaks = time_log("patterson_peaks").start()
    if (patterson_peak_search_params is None):
      patterson_peak_search_params = patterson.peak_search_parameters()
    patterson_peak_search_params.compute_derived(
      structure_min_distance=substructure_params.min_distance)
    patterson_peaks = patterson.peak_search(out=local_out,
      f_obs=self.structure_factors.q_all,
      substructure_params=substructure_params,
      peak_search_params=patterson_peak_search_params).all(
        max_clusters=search_params.n_patterson_vectors)
    self.time_patterson_peaks.stop()
    self.xray_scatterer = xray.scatterer( scattering_type = self.scattering_type)
    if (two_site_translation_params is None):
      two_site_translation_params = two_site_translation.parameters()
    two_site_search = two_site_translation.fast_nv1995(
      out=local_out,
      f_obs=self.structure_factors.q_all,
      substructure_params=substructure_params,
      xray_scatterer=self.xray_scatterer,
      params=two_site_translation_params)
    self.f_calculator = xray.structure_factors.from_scatterers(
      miller_set=self.structure_factors.q_all,
      cos_sin_table=search_params.cos_sin_table)
    self.crystal_gridding_tags = self.structure_factors.q_all.crystal_gridding(
      symmetry_flags=maptbx.use_space_group_symmetry).tags()
    self.peak_search_parameters = maptbx.peak_search_parameters(
      peak_search_level=1,
      interpolate=True,
      min_distance_sym_equiv=substructure_params.min_distance_sym_equiv,
      general_positions_only=substructure_params.general_positions_only,
      effective_resolution=search_params.peak_search_effective_resolution,
      min_cross_distance=substructure_params.min_cross_distance)

    try:
        extrapolation_scanner = get_extrapolation_object(
            hyss_search = self,
            extrapolation = extrapolation,
            out=local_out
            )

    except ValueError, e:
        raise Sorry, "Could not setup extrapolation: %s" % e

    if (tangent_refinement_params is None and squaring_params is None):
      if (search_params.real_space_squaring is None):
        if (substructure_params.n_sites < 100):
          tangent_refinement_params = tangent_formula.refinement_parameters()
        else:
          squaring_params = squaring.extrapolation_parameters()
      elif (search_params.real_space_squaring):
        squaring_params = squaring.extrapolation_parameters()
      else:
        tangent_refinement_params = tangent_formula.refinement_parameters()
    if (tangent_refinement_params is None):
      tangent_refinement = None
    else:
      tangent_refinement = tangent_formula.refinement(
        out=local_out,
        q_obs_large=self.structure_factors.q_large,
        n_sites=substructure_params.n_sites,
        params=tangent_refinement_params)
    if (squaring_params is None):
      squaring_extrapolation = None
    else:
      squaring_extrapolation = squaring.extrapolation(
        q_obs_large=self.structure_factors.q_large,
        n_sites=substructure_params.n_sites,
        params=squaring_params)
    if (search_params.p1_n_recycling_cycles > 0):
      p1_recycler = p1_recycling.recycler(
        substructure_params=substructure_params,
        search_params=search_params,
        xray_scatterer=self.xray_scatterer,
        structure_factors=self.structure_factors,
        tangent_refinement_params=tangent_refinement_params,
        squaring_params=squaring_params, out=local_out)

    # Get scoring and refinement objects
    try:
      self.scoring = get_scoring_object( hyss_search = self, rescore = rescore ,
        llgc_sigma = llgc_sigma, out=local_out)

    except Exception, e:
      print >>out, "\nNOTE: Could not set up Phaser scoring: \n%s" % e
      self.failed=True
      self.check_close_out(local_out)
      return

    try:
      self.refinement = get_refinement_object( hyss_search = self, rescore = rescore,out=out )

    except ValueError, e:
      raise Sorry, "Could not setup refinement: %s" % e

    self.time_two_site_search = time_log("two_site_search")
    self.time_p1_recycling = time_log("p1_recycling")
    self.time_extrapolation_scan = time_log("extrapolation_scan")
    self.time_compute_correlation = time_log("compute_correlation")
    self.time_q_large_calc = time_log("q_large_calc")
    self.time_tangent_refinement = time_log("tangent_refinement")
    self.time_squaring = time_log("squaring")
    self.time_fft_map = time_log("fft_map")
    self.time_peak_search = time_log("peak_search")
    self.time_random_omit = time_log("random_omit")
    self.time_minimization = time_log("minimization")
    self.time_euclidean_model_matching = time_log("euclidean_model_matching")
    self.time_extrapolation_scan_total = time_log("extrapolation_scan_total")
    self.time_recycling_total = time_log("recycling_total")

    self.set_up_checks(out=out)  # 2013-01-21 TT

    self.n_hesc = search_params.n_high_extrapolation_scan_correlation_group
    self.top_group = None
    self.top_matches = None
    self.top_matches_with_sufficient_sites = None
    self.consensus_model = None

    import random
    saved_random=random.getstate()  # so that we can match original hyss exactly

    if dummy_search: return

    if fragment_list:  # just use what was passed in
      if not self.search_params.score_only:
        pass #print >>out,"Starting with input fragment list"

    elif self.search_params.input_emma_model_list: #  we have input sites

      if self.search_params.input_add_model:  # we are going to add one by one
        new_emma_model_list=[]
        add_site_list=self.search_params.input_add_model.as_xray_structure().sites_frac()
        #print>>out, "Adding sites from input_add_model to input_emma_models"
        for emma_model in self.search_params.input_emma_model_list:
          for site in add_site_list:
            f=emma_model.as_xray_structure(self.xray_scatterer ).as_emma_model()
            from cctbx.euclidean_model_matching import position
            f.add_position(position(label="ATOM", site=site))
            new_emma_model_list.append(f)
        self.search_params.input_emma_model_list=new_emma_model_list


      fragment_list=[]
      #print>>out, "Using input model(s):"
      ii=-1
      for input_emma_model in self.search_params.input_emma_model_list:
        f=input_emma_model.as_xray_structure(self.xray_scatterer )
        f=f.change_basis(self.cb_op_niggli)  # 2013-11-26
        f.model_number=ii
        f.i_patterson_vector=ii
        f.i_fragment=0
        f.score_value=0
        f.set_b_iso(45.) 
        f.set_occupancies(1.0)
        fragment_list.append(f)
      if return_unsorted_patterson_solutions and \
           replace_patterson_with_input_emma_models:
        self.unsorted_patterson_solutions=self.shake_sites(
            fragment_list,out=out)
        self.check_close_out(local_out)
        return

    else:
      # Get all the two-site solutions up front and sort them
      #print>>out, "Getting two-site solutions from Patterson function"
      fragment_list=[]
      i_patterson_vector=-1
      ii=-1
      for patterson_vector in patterson_peaks.cluster_analysis.sites():
        i_patterson_vector+=1
        self.time_two_site_search.start()
        two_site_fragments = two_site_search(
          patterson_vector=patterson_vector,
          max_fragments=search_params.n_fragments)
        self.time_two_site_search.stop()
        i_fragment=-1
        for f in two_site_fragments:
          i_fragment+=1
          ff=f.customized_copy()
          ii+=1
          ff.model_number=ii
          ff.i_patterson_vector=i_patterson_vector
          ff.i_fragment=i_fragment
          ff.score_value=0
          fragment_list.append(ff)
      if return_unsorted_patterson_solutions:
        self.unsorted_patterson_solutions=self.shake_sites(
            fragment_list,out=out)
        self.check_close_out(local_out)
        return

    fragment_list=self.shake_sites(fragment_list,out=out)
    if search_params.score_initial_solutions:
      fragment_list=self.sort_fragments(kw=kw,scoring=self.scoring,
        fragment_list=fragment_list, out=out)

    self.structures = []
    self.final_scores = []
    self.fully_recycled_flags = []
    self.extrapolation_scan_scores = []

    if nproc and nproc > 1 and len(fragment_list)>1:
      self.run_as_subprocesses(kw=kw,fragment_list=fragment_list,
        max_structures=max_structures,out=out)
      self.check_close_out(local_out)
      return
    if self.previous_search_result:
      self.structures=self.previous_search_result.structures
      self.final_scores=self.previous_search_result.final_scores
      self.fully_recycled_flags=self.previous_search_result.fully_recycled_flags
      self.extrapolation_scan_scores=\
          self.previous_search_result.extrapolation_scan_scores
    # Beginning of work loop
    if not self.search_params.score_only: # don't print out for score_only
      print>>out
      print>>out, "Entering search loop:"
      print>>out
      print>>out, "p = peaklist index in Patterson map or model number"
      print>>out, "f = peaklist index in two-site translation function"
      print>>out, "ess = score after extrapolation scan"
      print>>out, "r = number of dual-space recycling cycles"
      print>>out, "score = final score"
      print>>out

      out.flush()

    keyboard_interrupt = signal_utils.keyboard_interrupt_handler()
    terminate_early_due_to_total_tries=False
    terminate_early = False
    terminate_due_to_keyboard_interrupt = False

    random.setstate(saved_random)
    n_tries_total=0
    for fragment in fragment_list:
      if (terminate_early): break
      if (self.sort_id(fragment) % search_params.n_chunk != search_params.i_chunk):
        continue

      n_tries_total+=1
      if self.search_params.tries_to_skip and \
            n_tries_total <= self.search_params.tries_to_skip:
          continue # just skip it

      if self.search_params.max_tries and self.search_params.max_tries > 0 and \
          n_tries_total > self.search_params.max_tries:
        terminate_early_due_to_total_tries=True
        terminate_early=True
        break

      elif self.search_params.score_only:  # just score and continue
        try:
          score = self.scoring.minimize( structure = fragment)
        except Exception,e:
          print >>out,"Warning scoring failed ...carrying on. \n"
          continue
        fragment.score_value=score.score()
        if self.search_params.unit_occupancy:
          occupancies=fragment.scatterers().extract_occupancies()
          new_occupancies=flex.double(len(occupancies)*[1.0])
          fragment.scatterers().set_occupancies(new_occupancies)

        self.note_fragment(fragment,out=out)

        fragment.info = dicts.easy(
          i_patterson_vector=fragment.i_patterson_vector,
          i_fragment=fragment.i_fragment,
          fragment=fragment,
          n_recycling_cycles=0,
          extrapolation_scan_structure=fragment,
          extrapolation_scan_score=score,
          final_score=score,
          )
        structure_original_symmetry = fragment.change_basis(
            self.cb_op_niggli_inverse )
        structure_original_symmetry.info=fragment.info
        structure_original_symmetry.model_number=fragment.model_number
        self.fully_recycled_flags.append( True )
        self.final_scores.append( score )
        self.structures.append(structure_original_symmetry)

        if self.search_params.dump_all_models or \
           self.search_params.comparison_emma_model or \
           not self.search_params.score_only: 
          self.update_top_group()
          self.show_structure_info(
            structure_original_symmetry.info,
            show_top_group = True,
            structure=structure_original_symmetry,
            fragment=fragment,
            out=out
             )
          self.euclidean_model_matching(out=out)
        terminate_early = self.determine_early_termination(out=out)

      else: # do the extrapolation
        # Start of extrapolation

        extrapolation_is_cheap = (  self.time_extrapolation_scan.average()
                                  < self.time_recycling_total.average())
        self.time_extrapolation_scan_total.start()
        self.time_extrapolation_scan.start()
        ( extrapolation_scan_structure, extrapolation_scan_score ) = (
            extrapolation_scanner.get_extrapolation_structure_and_score(
                fragment = fragment,
                n_sites = substructure_params.n_sites
                )
            )
        self.extrapolation_scan_scores.append( extrapolation_scan_score )
        self.time_extrapolation_scan.stop()
        structure = extrapolation_scan_structure

        self.time_extrapolation_scan_total.stop()
        i_recycling_cycle = 0
        if self.search_params.skip_optimization:
          tidied = sort_by_occupancy_and_tidy( fragment)
          try:
            score = self.scoring.minimize( structure = structure)
          except Exception,e:
            print >>out,"Warning scoring failed ...carrying on. \n"
            continue

          self.fully_recycled_flags.append( True)
          self.final_scores.append( score)

        elif (    extrapolation_is_cheap
            and not self.is_high_score( score = extrapolation_scan_score ) ):
          final_score = None
          self.fully_recycled_flags.append(False)

        else:
          self.time_recycling_total.start()
          while 1:
            # Start of recycling
            if (   i_recycling_cycle == 0
                and search_params.p1_n_recycling_cycles > 0):
              self.time_p1_recycling.start()
              q_large_extrapolated = p1_recycler(structure)
              self.time_p1_recycling.stop()
            else:
              self.time_q_large_calc.start()
              q_large_calc = self.f_calculator(
                xray_structure=structure,
                miller_set=self.structure_factors.q_large).f_calc()
              self.time_q_large_calc.stop()
              if (tangent_refinement is not None):
                self.time_tangent_refinement.start()
                q_large_extrapolated = tangent_refinement(
                  q_large_calc=q_large_calc)
                self.time_tangent_refinement.stop()
              else:
                self.time_squaring.start()
                q_large_extrapolated = squaring_extrapolation(
                  q_large_calc=q_large_calc)
                self.time_squaring.stop()
            self.time_fft_map.start()
            fft_map = miller.fft_map(
              crystal_gridding=self.crystal_gridding_tags,
              fourier_coefficients=q_large_extrapolated)
            self.time_fft_map.stop()
            self.time_peak_search.start()
            cluster_analysis = self.crystal_gridding_tags.peak_search(
              parameters=self.peak_search_parameters,
              map=fft_map.real_map()).all(
                max_clusters=search_params.n_candidate_sites)
            self.time_peak_search.stop()
            i_recycling_cycle += 1
            if (i_recycling_cycle >= search_params.n_recycling_cycles):
              break
            self.time_random_omit.start()
            structure = structure_from_clusters.random_omit(
                cluster_analysis=cluster_analysis,
                xray_scatterer=self.xray_scatterer,
                n_candidate_sites=search_params.n_candidate_sites,
                n_use_sites=search_params.n_use_sites,
                use_heights_as_occupancies
                  =search_params.site_recycling_use_heights_as_occupancies)
            if search_params.keep_seed_sites:
              # restore any seed sites that are gone now
              structure=self.restore_seed_sites(structure,seed_fragment=fragment,
                 tolerance=self.substructure_params.min_distance)
            self.time_random_omit.stop()
            # End of recycling

          self.time_recycling_total.stop()
          structure = structure_from_clusters.build(
            cluster_analysis=cluster_analysis,
            xray_scatterer=self.xray_scatterer,
            n_sites=search_params.n_sites_final)

          self.time_minimization.start()
          try:
            score = self.scoring.minimize( structure = structure )
          except Exception,e:
            print >>out,"Warning scoring failed ...carrying on. \n"
            continue

          self.time_minimization.stop()
          tidied = sort_by_occupancy_and_tidy( structure )
          self.fully_recycled_flags.append( True )
          self.final_scores.append( score )



        structure_original_symmetry = tidied.change_basis(
           self.cb_op_niggli_inverse )
        structure_original_symmetry.info = dicts.easy(
          i_patterson_vector=fragment.i_patterson_vector,
          i_fragment=fragment.i_fragment,
          fragment=fragment.change_basis( self.cb_op_niggli_inverse ),
          extrapolation_scan_structure=extrapolation_scan_structure
            .change_basis( self.cb_op_niggli_inverse ),
          extrapolation_scan_score=extrapolation_scan_score,
          n_recycling_cycles=i_recycling_cycle,
          final_score=score,
          )
        structure_original_symmetry.model_number=fragment.model_number
        self.structures.append( structure_original_symmetry )

        self.update_top_group()
        self.show_structure_info(
          structure_original_symmetry.info,
          show_top_group = True,
          structure=structure_original_symmetry,
          fragment=fragment,
          out=out
          )
        self.euclidean_model_matching(out=out)
        terminate_early = self.determine_early_termination(out=out)
        if (keyboard_interrupt.n_events > 0):
          if (not terminate_early):
            terminate_due_to_keyboard_interrupt = True
          terminate_early = True
        if self.search_params.skip_optimization and \
            self.search_params.enable_early_termination:
          terminate_early = True
        # End of extrapolation

    best_consensus_only = False
    if (self.top_matches is None or self.top_matches.n_matches() == 0):
      self.top_matches = self.top_matches_with_sufficient_sites
      best_consensus_only = (self.top_matches is not None)
    if (self.top_matches is not None):
      self.show_top_matches(best_consensus_only,out=out)
    self.minimize_consensus_model( n_sites = substructure_params.n_sites ,
        out=out)

    if (terminate_early):
      if not terminate_early_due_to_total_tries:
        self.terminated_early=True
        print >>out, "\nDone...early termination based on matching solutions.\n"
      if (terminate_due_to_keyboard_interrupt):
        print>>out, "Search terminated due to keyboard interrupt."
        print>>out
      else:
        print>>out, "Search finished."
        print>>out
    if search_params.verbose:
      print>>out, time_log.legend
      print>>out, self.time_patterson_peaks.report()
      print>>out, self.time_two_site_search.report()
      print>>out, self.time_extrapolation_scan.report()
      print>>out, self.time_p1_recycling.report()
      if (search_params.p1_n_recycling_cycles > 0):
        p1_recycler.show_times(out=out)
      print>>out, self.time_compute_correlation.report()
      print>>out, self.time_q_large_calc.report()
      print>>out, self.time_tangent_refinement.report()
      print>>out, self.time_squaring.report()
      print>>out, self.time_fft_map.report()
      print>>out, self.time_peak_search.report()
      print>>out, self.time_random_omit.report()
      print>>out, self.time_minimization.report()
      print>>out, self.time_euclidean_model_matching.report()
      print>>out, self.time_total.log()
      print>>out, "time unaccounted for: %.2f" % (
        self.time_total.accumulation
        - self.time_patterson_peaks.accumulation
        - self.time_two_site_search.accumulation
        - self.time_extrapolation_scan.accumulation
        - self.time_p1_recycling.accumulation
        - self.time_compute_correlation.accumulation
        - self.time_q_large_calc.accumulation
        - self.time_tangent_refinement.accumulation
        - self.time_squaring.accumulation
        - self.time_fft_map.accumulation
        - self.time_peak_search.accumulation
        - self.time_random_omit.accumulation
        - self.time_minimization.accumulation
        - self.time_euclidean_model_matching.accumulation)
    out.flush()
    keyboard_interrupt.disable()
    if write_to_text:
      self.output=out.getvalue()  # save output
    self.check_close_out(local_out)

  def check_close_out(self,local_out):
    if self.need_to_close_local_out:
      local_out.close()

  def shake_sites(self,fragment_list,out=sys.stdout):
    if not self.search_params.n_shake:
      return fragment_list

    new_list=[]
    model_number_offset=len(fragment_list) + 10
    for old_f in fragment_list:
      new_list.append(old_f)
      for k in xrange(self.search_params.n_shake):
          f=self.shake(struct=old_f,rms=self.search_params.rms_shake)
          f.model_number=old_f.model_number+(k+1)*model_number_offset
          f.i_patterson_vector=old_f.i_patterson_vector
          f.i_fragment=old_f.i_fragment
          f.score_value=0
          new_list.append(f)
    if self.search_params.n_shake > 0:
          print>>out, "Total of %d shake models with rms of %7.2f" %(
             len(new_list),self.search_params.rms_shake)
    return new_list

  def sort_id(self,fragment):
    if hasattr(fragment,'sort_id'):
      return fragment.sort_id
    else:
      return fragment.model_number


  def run_as_subprocesses(self,kw={},fragment_list=None,
     max_structures=None,out=sys.stdout):
    nproc=kw['nproc']
    search_params=kw['search_params']
    search_params.n_chunk=nproc

    method=kw.get('queue_name','multiprocessing')
    qsub_command=kw.get('qsub_command')

    #print >>out,"\nRunning %d %s sub-processes now...\n" %(nproc,method)
    for x in ['self','nproc','out']: del kw[x] # so it is not recursive
    # NOTE: search(**kw)  would run normally

    if fragment_list:
      kw['fragment_list']=fragment_list
      kw['search_params'].score_initial_solutions=False #already set up
      kw['search_params'].n_shake=None#already set up
      kw['previous_search_result']=None #already set up

    sys_stdout_sav=sys.stdout
    if self.search_params.verbose:
      kw['write_to_text']=True
    else:
      kw['write_to_text']=None
      from libtbx.utils import null_out
      sys.stdout=null_out()

    search_func=search_manager(kw,max_structures=max_structures)
    from libtbx.easy_mp import parallel_map, pool_map
    if (method == "multiprocessing") and (sys.platform != "win32") :
      subprocess_search_results = pool_map(
        func=search_func,
        iterable=xrange(nproc),
        processes=nproc)
    else :
      subprocess_search_results=parallel_map(
        func=search_func,
        iterable=xrange(nproc),
        method=method,
        processes=nproc,
        callback=None,
        qsub_command=qsub_command,
        use_manager=(sys.platform == "win32"))
    sys.stdout=sys_stdout_sav  # restore it if we made it null_out

    # save the results to pass back to hyss:

    if self.previous_search_result:
      overall_result=self.previous_search_result
      others=subprocess_search_results
    elif subprocess_search_results:
      overall_result=subprocess_search_results[0]
      others=subprocess_search_results[1:]
    else:
      return self # nothing there

    for subprocess_search_result in others:
      overall_result.combine(subprocess_search_result)

    if self.search_params.verbose:
      print >>out, overall_result.output
    self.structures=overall_result.structures
    self.fully_recycled_flags=overall_result.fully_recycled_flags
    self.final_scores=overall_result.final_scores

    # now get consensus and return
    print >>out, "Done with this analysis"
    out.flush()
    if self.search_params.score_only:  # just quit
      return self

    print >>out,"...getting top scores from %d structures\n" %(
      len(self.structures))
    out.flush()

    sort_list=[]
    for structure in self.structures:
      sort_list.append([structure.info.final_score,structure])
    sort_list.sort()
    if len(sort_list) > self.search_params.max_view:
      print >>out,"Showing top %d structures" %(self.search_params.max_view)

    for score,structure in sort_list[-self.search_params.max_view:]:
      self.show_structure_info(
        structure.info,
        show_top_group = False,
        structure=structure,
        fragment=None,
         out=out
        )
    print >>out
    self.update_top_group()
    print >>out
    self.euclidean_model_matching(out=out)
    best_consensus_only = False
    if (self.top_matches is None or self.top_matches.n_matches() == 0):
      self.top_matches = self.top_matches_with_sufficient_sites
      best_consensus_only = (self.top_matches is not None)

    if (self.top_matches is not None and
          not self.top_matches_are_dummy_matches):
      self.show_top_matches(best_consensus_only,out=out)
    self.minimize_consensus_model(
         n_sites = self.substructure_params.n_sites ,out=out)

    # see if we should terminate:
    if self.determine_early_termination(out=out):
      print >>out, "\nDone...early termination based on matching solutions.\n"
      self.terminated_early=True
    return self

  def restore_seed_sites(self,structure,seed_fragment=None,
     tolerance=3.0):
    # put back any sites from fragment that are missing in structure
    from cctbx import sgtbx
    # this can be done once above...
    sps=structure.crystal_symmetry().special_position_settings(
      min_distance_sym_equiv=0.5)
    target_sites=seed_fragment.sites_frac()
    target_fract_ses=[]
    for target_site in target_sites:
      target_fract_ses.append(sps.sym_equiv_sites(site=target_site))

    sites=structure.sites_frac()
    # find target sites in sites...
    i1=-1
    sites_to_add=[]
    sites_to_keep=[]
    sites_as_list=[]
    for site in sites: sites_as_list.append(site)
    for x1_fract_ses,target_site in zip(target_fract_ses,target_sites):
      found=False
      i1+=1
      i2=-1
      for site in sites:
        i2+=1
        info=sgtbx.min_sym_equiv_distance_info(reference_sites=x1_fract_ses,
            other=site)
        if info.dist() <= tolerance:
          found=True
          if not site in sites_to_keep:sites_to_keep.append(site)
          break
      if not found:
        sites_to_add.append(target_site)
    if sites_to_add: # we need to add some back in. Don't delete sites_to_keep
      new_sites=[]
      for site in sites_to_add+sites_to_keep+sites_as_list:
        if len(new_sites) >= len(sites): break
        if site in new_sites: continue
        new_sites.append(site)
      flex_new_sites=flex.vec3_double(tuple(new_sites))
      structure.set_sites_frac(flex_new_sites)
    return structure

  def shake(self,struct=None,rms=1.):
    from copy import deepcopy
    new_fragment=struct.customized_copy()
    sites=new_fragment.sites_frac()
    new_sites=flex.vec3_double()
    import random
    for site in sites:
      new_site=[]
      for x in site:
        new_site.append(x+random.gauss(0.,rms))
      new_sites.append(tuple(new_site))
    new_fragment.set_sites_frac(new_sites)
    return new_fragment


  def dump_fragment(self,fragment,index=0,
    info_text=None,prefix="dump",verbose=True,
     try_again_on_failure=True,out=sys.stdout):
    try:
      standard_basis = fragment.change_basis( self.cb_op_niggli_inverse )
    except:
      standard_basis = fragment
      print >>out, "Failed to convert to standard basis?"
    xray_scatterer=xray.scatterer( scattering_type = self.scattering_type)
    try:
      if info_text:
        f=open('%s_%s.pdb' %(prefix,info_text),'w')
      else:
        f=open('%s_%d.pdb' %(prefix,index),'w')
      print >>f, standard_basis.as_emma_model().as_xray_structure(xray_scatterer).as_pdb_file()
      if verbose:
        print >>out, " -> %s" %(f.name)
      f.close()
    except:  # try again one more tim after short wait..
      if try_again_on_failure:
        import time
        time.sleep(1)
        self.dump_fragment(fragment,index=index,info_text=info_text,
          prefix=prefix,verbose=verbose,try_again_on_failure=False,out=out)
      else:
        print >>out, "Failed to write fragment...%d" %(fragment.model_number)


  def run_score_as_subprocesses(self,kw={},scoring=None,
      fragment_list=[],out=sys.stdout):
    # run scoring as subprocesses...

    nproc=kw['nproc']
    search_params=kw['search_params']
    search_params.n_chunk=nproc

    method=kw.get('queue_name','multiprocessing')
    qsub_command=kw.get('qsub_command')

    print >>out,"\nRunning %d %s scoring sub-processes now...\n" %(nproc,method)

    score_func=sort_manager(fragment_list=fragment_list,
      n_chunk=nproc,scoring=scoring)
    from libtbx.easy_mp import parallel_map

    score_results=parallel_map(
       func=score_func,
       iterable=xrange(nproc),
       method=method,
       processes=nproc,
       callback=None,
       qsub_command=qsub_command,
       use_manager=False)

    overall_result=score_results[0]
    for score_result in score_results[1:]:
      overall_result.combine(score_result)
    # sort and save the results to pass back to hyss:
    return overall_result.fragment_list,overall_result.score_list

  def sort_fragments(self,kw={},scoring=None,
       fragment_list=[],out=sys.stdout): # TT 2013-01-26
      print >>out, "Scoring and sorting initial fragments\n"
      if kw.get('nproc') and kw.get('nproc')>1: # run as subprocesses
         fragment_use_list,score_list=self.run_score_as_subprocesses(
           kw=kw,scoring=scoring,
           fragment_list=fragment_list,out=out)
      else:
        search_params=kw.get('search_params') # so we can pass in via kw
        # score everything
        score_list=[]
        fragment_use_list=[]
        for fragment in fragment_list:
          if (self.sort_id(fragment) % search_params.n_chunk != \
             search_params.i_chunk): continue
          try:
            score = scoring.minimize( structure = fragment)
          except Exception,e:
            print >>out,"Warning scoring failed ...carrying on. \n"
            continue
          score_list.append(score.score())
          fragment_use_list.append(fragment)

      from phenix.substructure.combine_substructures import sort_follow
      fragment_list=sort_follow(fragment_use_list,score_list,
        append_score_value=True,reverse=True)

      print >>out, "\nList of 2-site solutions:"
      i=-1
      for fragment in fragment_list:
        i+=1
        fragment.model_number=i
        self.note_fragment(fragment,out=out)
      print >>out
      return fragment_list

  def note_fragment(self,fragment,out=sys.stdout):
        if self.search_params.score_initial_solutions:
          print >>out, "Rank: %d Patterson vector:%03d Fragment:%03d " %(
         fragment.model_number,
          fragment.i_patterson_vector,fragment.i_fragment) +\
           "sites:%03d score:%8.3f " %(
              len(fragment.sites_frac()),fragment.score_value) ,
        if self.search_params.comparison_emma_model:
          n_list=self.match_to_comparison(
             fragment.as_emma_model(),title='START',
             display=False,
             tolerance=self.search_params.comparison_emma_model_tolerance,
             out=out)
          print >>out, "Matching sites: %s" %(str(n_list))
        else: # usual
          pass #print >>out
        if self.search_params.dump_all_fragments:
          self.dump_fragment(fragment,index=fragment.model_number,
          prefix="fragment",verbose=True,
            out=out)

  def match_to_comparison(self,emma_model,title="",display=True,
    tolerance=None,out=sys.stdout):
    # get match to comparison model:
    n_list=[]
    sum_all_but_first=0
    first=True
    if tolerance is None:
      tolerance=0.99 * self.substructure_params.min_distance
    for x in self.search_params.comparison_emma_model:
      if not x.unit_cell().is_similar_to(emma_model.unit_cell()):
        # convert it 2014-02-02 XXX do this just once...
        x=(x.as_xray_structure( self.xray_scatterer ).
         change_basis( self.cb_op_niggli )).as_emma_model()
      comparison_top_matches = emma.model_matches(
        model1 = emma_model,
        model2 = x,
        tolerance = tolerance,
        models_are_diffraction_index_equivalent = True,
        break_if_match_with_no_singles=True
        )
      n=comparison_top_matches.n_pairs_best_match()
      n_list.append(n)
      if not first: sum_all_but_first+=n
      first=False

    n_list.append(sum_all_but_first)
    if display:
      print >>out, "%s %s of %d" %(title,str(n_list),m)
    return n_list


  def set_up_checks(self,out=sys.stdout):
    # if user has requested, set up any check info

    if self.search_params.skip_optimization:
      print >>out, "Skipping optimization"
    if self.search_params.verbose:
      print >>out, "Verbose output"
    if self.search_params.max_tries and self.search_params.max_tries>0:
      print >>out, "\nStopping after %d evaluations " %(self.search_params.max_tries)
    if self.search_params.tries_to_skip:
      print >>out, "Skipping first %d evaluations " %(self.search_params.tries_to_skip)
    if self.search_params.random_search:
      print >>out, "Using random search instead of patterson function with %d tries" %(
         self.search_params.n_random_search)

  def show_structure_info(self, info, show_top_group=False, structure=None,
     fragment=None,out=sys.stdout):

    if show_top_group and self.top_group is not None:
      best_scores = " [ best score: %s ]" % " ".join(
        [ str( s ) for s in self.top_group.scores ]
        )

    else:
      best_scores = ""

    if structure and self.search_params.comparison_emma_model:
      n_list=self.match_to_comparison(structure.as_emma_model(),title='CHECK',
             tolerance=self.search_params.comparison_emma_model_tolerance,
             display=False,out=out)
      matching=" Matching sites: %s" %(str(n_list))

    else:
      matching=""

    info_text="%d_%d" %(info.i_patterson_vector,info.i_fragment)
    if fragment and hasattr(fragment,'model_number'):
      info_text="%s_%d" %(info_text,fragment.model_number)
      model_text=" model %d " %(fragment.model_number)
    else:
      model_text=""

    print >>out, "p=%03d f=%03d ess=%s r=%03d score=%s%s %s %s" % (
      info.i_patterson_vector,
      info.i_fragment,
      info.extrapolation_scan_score,
      info.n_recycling_cycles,
      info.final_score,
      best_scores,
      matching,
      model_text,
      ) ,
    if self.search_params.dump_all_models:
      self.dump_fragment(structure,info_text=info_text,
         prefix="dump",verbose=True,out=out)
    else:
      print >>out


  def compute_correlation(self, f_obs, structure):
    self.time_compute_correlation.start()
    coeff = self.correlation_calculation( f_obs, structure )
    self.time_compute_correlation.stop()
    return coeff

  def correlation_calculation(self, f_obs, structure):
    f_calc = self.f_calculator(
      xray_structure=structure,
      miller_set=f_obs).f_calc()
    correlation = flex.linear_correlation(
      f_obs.data(),
      flex.abs(f_calc.data()))
    assert correlation.is_well_defined()
    return correlation.coefficient()

  def is_high_score(self, score):

      if 0 < self.n_hesc and self.n_hesc < self.len( self.extrapolation_scan_scores ):
        p = sorted( self.extrapolation_scan_scores, key = lambda s: s.score(), reverse = True )
        return p[ self.n_hesc - 1 ] <= score

      return True

  def update_top_group(self):
    assert len( self.structures ) == len( self.final_scores )
    assert len( self.structures ) == len( self.fully_recycled_flags )

    structures_and_scores = [
      ( structure, score ) for ( structure, score, flag ) in zip(
        self.structures,
        self.final_scores,
        self.fully_recycled_flags
        )
      if flag
      ]

    if not structures_and_scores:
      return
    min_score = self.scoring.top_group_threshold(
      scores = [ p[1] for p in structures_and_scores ]
      )
    sorted_in_range_scores = sorted(
      [ pair for pair in structures_and_scores if min_score <= pair[1] ],
      key = lambda pair: pair[1].score(),
      reverse = True
      )

    if self.search_params.matches_must_not_share_patterson_vector:
      structure_1,score_1=sorted_in_range_scores[0]
      new_sorted_in_range_scores=[(structure_1,score_1)]

      patterson_vectors_used=[structure_1.info.i_patterson_vector]
      for struct,score in sorted_in_range_scores[1:]:
        patterson_vector_2=struct.info.i_patterson_vector
        if patterson_vector_2 in patterson_vectors_used:
          continue # just don't include dups in matching
        else:
          patterson_vectors_used.append(patterson_vector_2)
          new_sorted_in_range_scores.append((struct,score))
      sorted_in_range_scores=new_sorted_in_range_scores

    sorted_in_range_scores=sorted_in_range_scores[ :self.search_params.n_top_models_to_compare ]

    assert sorted_in_range_scores
    ( structures, scores ) = zip( *sorted_in_range_scores )
    if ( self.top_group is None or self.top_group.structures != structures ):
      self.top_group = store( structures = structures, scores = scores, did_emma = False )

  def euclidean_model_matching(self,out=sys.stdout):

    if (self.top_group is None or self.top_group.did_emma):
      return False

    self.top_group.did_emma = True
    self.time_euclidean_model_matching.start()
    assert self.top_group.structures
    model1 = self.top_group.structures[0].as_emma_model()
    self.top_matches = emma.model_matches(
        model1 = model1,
        model2 = model1,
        tolerance = 0.99 * self.substructure_params.min_distance,
        models_are_diffraction_index_equivalent = True,
        break_if_match_with_no_singles=True
        )
    self.top_matches_are_dummy_matches=True
    matched_structures = [ self.top_group.structures[0] ]
    self.top_matches.structures = matched_structures[:]
    self.top_matches_with_sufficient_sites = self.top_matches
    if self.search_params.skip_consensus:
      self.time_euclidean_model_matching.stop()
      return False

    found_sufficient_sites=False
    for struct in self.top_group.structures[1:]:
      matched_structures.append( struct )
      self.top_matches = emma.model_matches(
        model1 = self.top_matches.consensus_model(),
        model2 = struct.as_emma_model(),
        tolerance = 0.99 * self.substructure_params.min_distance,
        models_are_diffraction_index_equivalent = True,
        break_if_match_with_no_singles=True
        )
      self.top_matches.structures = matched_structures[:]
      n_pairs_best_match = self.top_matches.n_pairs_best_match()
      if (n_pairs_best_match == 0):
        break

      self.top_matches_are_dummy_matches=False
      print >>out, "Number of matching sites of top %d structures: %d" % (
        len( self.top_matches.structures ),
        n_pairs_best_match
        )
      out.flush()

      if (n_pairs_best_match >= self.search_params.min_top_n_matches):
        self.top_matches_with_sufficient_sites = self.top_matches
        found_sufficient_sites=True
    if found_sufficient_sites:
      self.top_matches=self.top_matches_with_sufficient_sites

    self.time_euclidean_model_matching.stop()
    return True


  def show_top_matches(self, best_consensus_only, out=sys.stdout):

    print >>out

    if (best_consensus_only):
      print >>out, "Best consensus:"

    else:
      print >>out, "Top %d scores:" % len(self.top_matches.structures)

    for struct in self.top_matches.structures:
      self.show_structure_info( struct.info ,structure=struct,out=out)

    if 0 < self.top_matches.n_matches() and \
        self.top_matches.refined_matches[0].rms > 0.0:
      self.top_matches.refined_matches[0].show(truncate_singles=5,f=out)

    else:
      print >>out, "Structures with top %d scores have no sites in common." % len(
        self.top_matches.structures
        )
      print>>out


  def minimize_consensus_model(self, n_sites,out=sys.stdout):
    if not self.top_matches or not self.top_matches.structures: # no results at all
      self.consensus_model = None
      self.consensus_correlation = None
      return

    n_pairs_best_match = self.top_matches.n_pairs_best_match()

    consensus_model = self.top_matches.consensus_model()
    consensus_model = (consensus_model
      .as_xray_structure( self.xray_scatterer )
      .change_basis( self.cb_op_niggli )
    )

    self.time_minimization.start()
    consensus_model_sav=consensus_model.customized_copy()
    try:
      score = self.refinement.minimize( structure = consensus_model )
    except Exception,e:
      print >>out, e
      print >>out,"Warning consensus model scoring failed ...carrying on. \n"
      score=correlation_coefficient_score(-99999.9)
      consensus_model = consensus_model_sav

    self.consensus_score=score
    self.time_minimization.stop()
    count = len( consensus_model.scatterers() )
    if self.top_matches_are_dummy_matches:
      print >>out,"Score for top model",
    else:
      print >>out,"Score for consensus model",
    print >>out, "(%d site%s, before truncation): %s" % (
      count,
      "s" if count != 1 else "",
      score
      )
    if self.model_needs_to_be_truncated(
       model = consensus_model, max_sites = n_sites ) and \
       not self.scoring.scoring_type in ['phaser-complete','phaser-refine']:
      print >>out, "Truncating consensus model to expected number of sites."
      consensus_model = consensus_model[:n_sites]

    self.consensus_correlation = self.correlation_calculation(
      f_obs = self.structure_factors.q_all,
      structure = consensus_model
      )
    print >>out, "Correlation coefficient for consensus model (%d sites, after truncation): %.3f" % (
      len( consensus_model.scatterers() ),
      self.consensus_correlation
      )

    self.consensus_model = sort_by_occupancy_and_tidy(
      consensus_model.change_basis( self.cb_op_niggli_inverse )
      )
    print >>out

  # XXX for phenix gui tracking
  def get_consensus_cc_and_site_count (self) :
    cc = getattr(self, "consensus_correlation", None)
    model = getattr(self, "consensus_model", None)
    n_sites = 0
    if (model is not None) :
      n_sites = len(model.scatterers())
    return (cc, n_sites)

  def model_needs_to_be_truncated(self, model, max_sites):

    if not self.search_params.truncate_consensus_model:
      return False

    if len( model.scatterers() ) <= max_sites:
      return False

    return True


  def determine_early_termination(self,out=sys.stdout):

    if self.top_matches_are_dummy_matches:
      return False
    if not self.search_params.enable_early_termination:
      return False

    if self.top_group is None:
      return False

    if len( self.top_group.structures ) < self.search_params.n_top_models_to_compare:
      return False

    if self.top_matches.n_pairs_best_match() < self.search_params.min_top_n_matches:
      return False

    if len(self.top_matches.structures) < self.search_params.n_top_models_to_compare:
       return False
    valid_scores = [
      s for ( s, flag ) in zip( self.final_scores, self.fully_recycled_flags )
      if flag
      ]

    if self.search_params.cluster_termination and \
        self.scoring.scoring_type in ['phaser-complete','phaser-refine']:
      if not self.scoring.terminate( top_scores = self.top_group.scores,
        valid_scores = valid_scores ): # use phaser cluster termination
        return False
    else:  # use terminate definition here in this file
      if not terminate( top_scores = self.top_group.scores,
          valid_scores = valid_scores,
          scoring_type=self.scoring.scoring_type,
          min_low_correlation=self.search_params.min_low_correlation,
          high_low_correlation_factor=\
            self.search_params.high_low_correlation_factor,
          compare_sites_if_correlation_is_over=\
            self.search_params.compare_sites_if_correlation_is_over ):
        return False

    print >>out,"\nEarly termination based on "+\
      "%d matching structures with %d matches" %(
      len(self.top_matches.structures),self.top_matches.n_pairs_best_match())
    return True


def sort_by_occupancy_and_tidy(structure):

  result = structure.sort( by_value="occupancy", reverse = True )

  for ( i, scat ) in enumerate( result.scatterers() ):
    scat.label = structure_from_clusters.make_scatterer_label( i )

  return result

def terminate(top_scores=None, valid_scores=None, scoring_type=None,
     min_low_correlation=None,high_low_correlation_factor=None,
      compare_sites_if_correlation_is_over=None):

    if scoring_type in ['phaser-complete','phaser-refine']:

      min_top = top_scores[-1].score()
      min_valid = max(
        min_low_correlation,
        min( valid_scores, key = lambda s: s.score() ).score()
        )
      return (
        high_low_correlation_factor * min_valid <= min_top
        )

    else:  # use correlation

      min_top = top_scores[-1].correlation()
      min_valid = max(
        min_low_correlation,
        min( valid_scores, key = lambda s: s.score() ).correlation()
        )

      return (
        compare_sites_if_correlation_is_over <= min_top
        or high_low_correlation_factor * min_valid <= min_top
        )

class search_structure_factors(object):

  def __init__(self, q_all, e_min):

    self.q_all = q_all
    e = self.q_all.quasi_normalized_as_normalized()
    self.q_large = self.q_all.select( e_min < e.data() )


class correlation_coefficient_score(object):
  """
  A score that is a correlation coefficient
  """

  def __init__(self, cc):

    self._cc = cc


  def correlation(self):

    return self._cc


  def score(self):

    return self.correlation()


  def __mul__(self, other):

    return self.__class__( cc = self.correlation() * other )


  def __rmul__(self, other):

    return self.__mul__( other = other )


  def __cmp__(self, other):

    return cmp( self.score(), other.score() )


  def __str__(self):

    return "%.3f (cc)" % self.correlation()


class pc_substructure_refinement_and_scoring(object):
  """
  Abstract substructure refinement protocol
  """

  def __init__(self, hyss_search, max_iterations = 5):

    if not hyss_search.structure_factors:
      raise ValueError, "Missing data: search structure factors"

    self._hyss_search = hyss_search
    self._sp = self._hyss_search.search_params
    self._max_iterations = max_iterations
    self._threshold_fraction = 1.0 - self._sp.max_relative_spread_top_correlations

  def top_group_threshold(self, scores):

      best_score = max( scores, key = lambda s: s.score() )
      return self._threshold_fraction * best_score


  def normalize_occupancies(self, structure):

    scatterers = structure.scatterers()
    occupancies = scatterers.extract_occupancies()
    f = flex.max( occupancies )

    if (f > 0):
      occupancies /= f
      scatterers.set_occupancies( occupancies )


class no_refinement(pc_substructure_refinement_and_scoring):
  """
  Patterson correlation occupancy refinement
  """

  def minimize(self, structure):

    return correlation_coefficient_score(
      cc = self._hyss_search.correlation_calculation(
        f_obs = self._hyss_search.structure_factors.q_all,
        structure = structure
        )
      )


class occupancy_refinement(pc_substructure_refinement_and_scoring):
  """
  Patterson correlation occupancy refinement
  """

  def minimize(self, structure):

    correl = minimization.occupancy(
      self._hyss_search.structure_factors.q_all,
      structure,
      max_iterations = self._max_iterations
      )
    self.normalize_occupancies( structure = structure )

    return correlation_coefficient_score( cc = correl )


class site_and_occupancy_refinement(pc_substructure_refinement_and_scoring):
  """
  Patterson correlation site and occupancy refinement
  """

  def minimize(self, structure):

    correls = minimization.site_and_occupancy(
      self._hyss_search.structure_factors.q_all,
      structure,
      max_iterations = self._max_iterations
      )
    self.normalize_occupancies( structure = structure )

    return correlation_coefficient_score( cc = correls[-1] )


class fast_nv1995_extrapolation(object):
    """
    Extrapolation using nv1995 fast translation function
    """

    def __init__(self, hyss_search,out=sys.stdout):

        self._hyss_search = hyss_search
        self._scanner = extrapolation_scan.fast_nv1995(
            f_obs = self._hyss_search.structure_factors.q_all,
            xray_scatterer = self._hyss_search.xray_scatterer,
            crystal_gridding_tags = self._hyss_search.crystal_gridding_tags,
            peak_search_parameters = self._hyss_search.peak_search_parameters
            )


    def get_extrapolation_structure_and_score(self, fragment, n_sites):

        cluster_analysis = self._scanner( fragment = fragment )
        extrapolation_scan_structure = structure_from_clusters.build(
          cluster_analysis = cluster_analysis,
          xray_scatterer = self._hyss_search.xray_scatterer,
          n_sites = n_sites
          )
        correlation = self._hyss_search.correlation_calculation(
          f_obs = self._hyss_search.structure_factors.q_all,
          structure = extrapolation_scan_structure
          )

        return (
            extrapolation_scan_structure,
            correlation_coefficient_score( cc = correlation ),
            )


def get_scoring_object(hyss_search, rescore, llgc_sigma = None, out=sys.stdout):

  # require minimum number of reflections for phaser rescoring 2014-02-02
  if rescore in ['phaser-refine','phaser-complete'] and \
    hyss_search.f_original.data().size() < \
      hyss_search.search_params.minimum_reflections_for_phaser:
    raise Sorry, "Too few reflections for phaser rescoring (%d)" %(
        hyss_search.f_original.data().size()) +\
       "\nUse minimum_reflections_for_phaser to change this"


  if rescore == "correlation":
      if hyss_search.search_params.occupancy_minimization:
        scoring = occupancy_refinement( hyss_search = hyss_search)

      else:
        scoring = no_refinement( hyss_search = hyss_search )

      scoring.scoring_type='correlation'

  elif rescore == "phaser-refine":
    try:
      from phaser.phenix_adaptors import hyss_scoring

    except ImportError, e:
      raise Sorry, e

    scoring = hyss_scoring.phaser_composite_rescoring(
      hyss_search = hyss_search,
      complete = False, out=out,
      )
    scoring.scoring_type='phaser-refine'

  elif rescore == "phaser-complete":
    try:
      from phaser.phenix_adaptors import hyss_scoring

    except ImportError, e:
      raise Sorry, e

    scoring = hyss_scoring.phaser_composite_rescoring(
      hyss_search = hyss_search,
      llgc_sigma = llgc_sigma,
      complete = True, out=out,
      )
    scoring.scoring_type='phaser-complete'

  else:
    raise Sorry, "Unknown scoring protocol: %s" % rescore

  return scoring

def get_refinement_object(hyss_search, rescore, out=sys.stdout):

  if rescore == "correlation":
      refinement = site_and_occupancy_refinement( hyss_search = hyss_search )

  elif rescore in ( "phaser-refine", "phaser-complete" ):
    try:
      from phaser.phenix_adaptors import hyss_scoring

    except ImportError, e:
      raise Sorry, e

    refinement = hyss_scoring.phaser_composite_rescoring(
      hyss_search = hyss_search,
      complete = False,
      out = out,
      )

  else:
    raise Sorry, "Unknown refinement protocol: %s" % rescore

  return refinement


def get_extrapolation_object(hyss_search, extrapolation,out=sys.stdout):

  if extrapolation == "fast_nv1995":
      scanner = fast_nv1995_extrapolation( hyss_search = hyss_search ,out=out)

  elif extrapolation == "phaser":
    try:
      from phaser.phenix_adaptors import hyss_scoring

    except ImportError, e:
      raise Sorry, e

    scanner = hyss_scoring.phaser_extrapolation(
      hyss_search = hyss_search
      )

  elif extrapolation == "phaser-map":
    try:
      from phaser.phenix_adaptors import hyss_scoring

    except ImportError, e:
      raise Sorry, e

    scanner = hyss_scoring.phaser_map_based_extrapolation(
      hyss_search = hyss_search
      )

  else:
    raise Sorry, "Unknown refinement protocol: %s" % extrapolation

  return scanner
