-í
$Añ=c       s™    d  Z    d k l Z  d k Z 	 d k Z 
 d k Z  d k Z  d k Z  d k Z  d „  Z	 ) d d d „ Z
 N d „  Z U d „  Z \ e d	 e  g  d
 g ƒ Z ] e i ƒ  _ e i d
 ƒ Z ` e o ` e e ƒ a n
 a d a d h  Z e d Z f xl e i Df ]^ Z g d Ge GHh e e ƒ Z i e i e ƒ Z j e i t e e ƒ \ Z Z k e e 7Z qWl d e GHn e i d ƒ i ƒ  Z p d GHr xår d oÚs e d ƒ Z  t e  d	 j o u d GHn¬| e  d j o= } d i! g  i" Z# e$ D} ] Z% e# e% d ƒ q[# ƒ GHn_~ e  d j o  e i e$ ƒ n;€ e  d j oè  xÚ e& e' e( e$ ƒ ƒ e$ ƒ D ]º \ Z \ Z) Z* ‚ d e e) f GHƒ g  i" Z# e* Dƒ ] Z+ e# e+ d ƒ q½[# Z, „ e e, ƒ Z- … e- Z. † d e. i/ ƒ  i0 d d ƒ d Z. ‡ e. i0 d e) d e e) f ƒ Z. ˆ e. Gq…W‰ HnCŠ e  d j o¬‹ g  i" Z# e$ D‹ ] \ Z) Z* e# e) ƒ qo[# Z1  d Z2 Ž xB e' d t ƒ DŽ ]. Z  e$ e Z3  e3 \ Z4 Z* ‘ d e4 GHq­W’ xe' t e( e$ ƒ ƒ D’ ]Z ” e$ e d Z5 • e5 \ Z4 Z* – g  i" Z# e* D– ] Z+ e# e+ d ƒ q5[# Z6 ˜ e$ e Z3 ™ e3 \ Z4 Z* š g  i" Z# e* Dš ] Z+ e# e+ d ƒ q‚[# Z7 œ xZ e7 Dœ ]. Z8  e8 d e6 j o ž d  e4 GHŸ Pn q¬W¡ d! e4 e1 e t d e !f GHqûWn‡ £ e  d" j oh ¤ d# d$ GH¥ e9 e i: e t ƒ ƒ d%  Z$ ¦ d i! g  i" Z# e$ D¦ ] Z% e# e% d ƒ q\[# ƒ GHn © e; e  ƒ GHq°Wd S(&   s-   Usage

-n num      n-gram length (default 3)
(   s   command_lineNc    s     d }  xþ t t t |  ƒ ƒ D ]ä }  |  | d }	  |	 \ } }
  g  i	 } |
 D ] } | | d ƒ qb ~ }  |  | }  | \ } }
  g  i	 } |
 D ] } | | d ƒ q¯ ~ }   x: | D  ]" } ! | d | j o " Pn qÙ W$ | d 7} q( W& | Sd S(   so   Counts the number of continuity breaks in the given sentence.

    sent: list of (word, its position list)
    i    i   i   N(   s   breakss   ranges   lengths   lens   sents   is   end_of_prev_ngrams   words   poslss   appends   _[1]s   poss   prev_absolute_wordpositionss   end_of_ngrams   cur_absolute_wordpositionss   cur_absolute_wordpos(   s   sents   prev_absolute_wordpositionss   words   cur_absolute_wordposs   is   cur_absolute_wordpositionss   breakss   poss   _[1]s   end_of_prev_ngrams   poslss   end_of_ngram(    (    s   main.pys   num_cbreaks s$   	 	    
 	f0.050000000000000003i   c    s†  ) / 0 1 d } 2 d } 3 d } 4 d } 5 xÛ t |  ƒ D5 ]Ê }	 6 | oK 7 d |	 |  | | j o+ 8 d d |	 |  |	 f GH9 | | 7} n n : t	 t
 i t t ƒ ƒ d  } ; t | ƒ } < | d j o < | d 7} n = | | 7} > | t | ƒ 7} q= W? | d |  } @ | d |  } A | d | } B | d |  } C d GHD t ƒ  }
 |
 i t ƒ  ƒ E d	 |
 GHd
 S(   sì   Generates a lot of sentences, and displays statistical info
    
    num_sents: number of sentences to run the analysis on
    benchmarkincr: for progress indicator
    status: boolean, whether or not to show the progress indicator
    i    f0.0f1.0s    %d%% done, %d sentences analyzedf100.0iÿÿÿÿi   s3   ------------------- Results -----------------------s  
length=%(length)s
num_sents=%(num_sents)s
perc_total_nobreaks=%(perc_total_nobreaks)s #Straight-copied sentences; indicator of sparseness
avg_words_per_sent=%(avg_words_per_sent)s
avg_breaks_per_sent=%(avg_breaks_per_sent)s
breaks_per_word=%(breaks_per_word)s
N(   s   total_breakss   total_wordss   total_nobreakss   lastbenchmarks   xranges	   num_sentss   is   statuss   benchmarkincrs   lists   rsgs   random_sentences   datas   lengths   sents   num_cbreakss
   num_breakss   lens   avg_words_per_sents   avg_breaks_per_sents   breaks_per_words   perc_total_nobreakss   localss   allvarss   updates   globals(   s	   num_sentss   benchmarkincrs   statuss   total_wordss   lastbenchmarks   perc_total_nobreakss   total_nobreakss   breaks_per_words   avg_words_per_sents   is   allvarss   total_breakss   avg_breaks_per_sents
   num_breakss   sent(    (    s   main.pys   do_stats) s2   				 	
 c    sa   N O P |  } Q t i d | ƒ i ƒ  i d d ƒ } R | i |  d t |  f ƒ } S | Sd S(   sE   hard-coded to work with the ICAME-Brown1 corpus on the leland systemssP   fgrep --no-filename -C 10 '%s' /afs/ir/data/linguistic-data/Brown/ICAME-Brown1/*s   
s    
s   %s[31m%s[0mN(   s   linetags   fgrepable_linetagss   oss   popens   reads   replaces   ss   ANSIBOLD(   s   linetags   ss   fgrepable_linetags(    (    s   main.pys   showN s
   	(c    sB   U V W d i  |  ƒ } X t i d | ƒ i ƒ  i d d ƒ Sd S(   sE   hard-coded to work with the ICAME-Brown1 corpus on the leland systemss   
sJ   fgrep --no-filename '%s' /afs/ir/data/linguistic-data/Brown/ICAME-Brown1/*s    
N(   s   joins   linetagss   fgrepable_linetagss   oss   popens   reads   replace(   s   linetagss   fgrepable_linetags(    (    s   main.pys   clinesU s   s   hs   ni   i    s   Processing files   %s total words processeds	   tput bolds   h for help.  Control-D to exits   ? sg   
[Enter]=new sentence;  
s=show sentence;
a=all positions; 
c=context positions; 
b=continuity analysiss   ss    s   as   cs   %s) %s s   	s   
s   
	s    %s s   %s[31m %s [0ms   bs
   %-25s: n/ai   s   %-25s: continuous..s.   %-25s: Continuity break over the n-1-gram: %s s    s   -i<   iÿÿÿÿ(<   s   __doc__s   cmd_lines   command_lines   syss   oss   rsgs   confs   inputclassess   pprints   num_cbreakss   do_statss   shows   cliness   flagss   reads   switchs   strlens   ints   lengths   datas	   wordcounts	   FILENAMESs   is   opens   infiles
   INPUTCLASSs   gens
   build_dicts	   wordsreads   popens   ANSIBOLDs	   raw_inputs   opts   joins   appends   _[1]s   sents   itms   zips   ranges   lens   ws   poslss   poss   linetagss	   ctxtliness   ss   strips   replaces   wordss   breakss   end_of_ngrams   words   end_of_prev_ngrams   prev_absolute_wordpositionss   cur_absolute_wordpositionss   cur_absolute_wordposs   lists   random_sentences   eval(&   s   itms   cur_absolute_wordposs   shows   linetagss   poss   confs   do_statss   poslss   strlens	   wordsreads   pprints   command_lines   ss   end_of_prev_ngrams   end_of_ngrams   sents   opts   inputclassess   breakss   rsgs	   wordcounts   syss   prev_absolute_wordpositionss   wordss   datas   gens   ANSIBOLDs	   ctxtliness   words   cliness   is   num_cbreakss   infiles   _[1]s   flagss   ws   cur_absolute_wordpositionss   os(    (    s   main.pys   ? s¤   %
 			 	 
 &   	## "	 	 	    
 	) &