import sys
import os
import rsg
import conf
import inputclasses
import pprint

def num_cbreaks(sent):
    breaks = 0
    for i in range(length, len(sent)):

        end_of_prev_ngram = sent[i-1]
        word,posls = end_of_prev_ngram
        prev_absolute_wordpositions = [pos[2] for pos in posls]

        end_of_ngram = sent[i]
        word, posls = end_of_ngram
        cur_absolute_wordpositions = [pos[2] for pos in posls]

        for cur_absolute_wordpos in cur_absolute_wordpositions:
            if cur_absolute_wordpos - 1 in prev_absolute_wordpositions:
                break #No continuity break!
        else:
            breaks += 1
            #print "%-25s: Continuity break -----" %word
    return breaks
    

def do_stats(num_sents, benchmarkincr=.05, status=1):
    global length
    total_breaks = 0
    total_words = 0
    total_nobreaks = 0
    lastbenchmark = 0.0
    for i in xrange(num_sents):
        if status:
            if 1.0 * i / num_sents > lastbenchmark + benchmarkincr:
                print "%d%% done, %d sentences analyzed" %(100.0 * i / num_sents, i)
                lastbenchmark += benchmarkincr
        sent = list(rsg.random_sentence(data, length))[:-1]
        num_breaks = num_cbreaks(sent)
        if num_breaks == 0: total_nobreaks += 1
        total_breaks += num_breaks
        total_words  += len(sent)
    avg_words_per_sent   = total_words * 1.0 / num_sents
    avg_breaks_per_sent = total_breaks * 1.0 / num_sents
    breaks_per_word      = total_breaks * 1.0 / total_words
    perc_total_nobreaks  = total_nobreaks *1.0 / num_sents
    print "------------------- Results -----------------------"
    allvars = locals(); allvars.update(globals())
    print """
length=%(length)s
num_sents=%(num_sents)s
perc_total_nobreaks=%(perc_total_nobreaks)s #Straight-copied sentences; indicator of sparseness
avg_words_per_sent=%(avg_words_per_sent)s
avg_breaks_per_sent=%(avg_breaks_per_sent)s
breaks_per_word=%(breaks_per_word)s
""" % allvars

