o
    g,                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m	Z	 ddl
mZmZmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlZddlmZ dd	l m!Z!m"Z"m#Z# dd
l$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ dZ,e-dZ.ede/Z0ede/Z1G dd deZ2edee0 f Z3ee0df Z4eeeef Z5G dd dZ6ede6f Z7G dd dZ8G dd dZ9G dd deZ:ede1e1f Z;e,fddde/dee3e4f fd d!Z<d"e7ddfd#d$Z=d%dd&e0d'dd(e0d)e/de/fd*d+Z>G d,d- d-eZ?G d.d/ d/Z@d0eeA d1eBd2eBd3eBd4eBdee2 fd5d6ZCejDG d7d dZEd8d9iZFd:e)d;e)fd<d=ZGG d>d? d?e!e@e"ZHd@dA ZIdGdBdCZJdHdDdEZKeLdFkrbeK  dS dS )Ia  a similarities / code duplication command line tool and pylint checker

The algorithm is based on comparing the hash value of n successive lines of a file.
First the files are read and any line that doesn't fullfill requirement are removed (comments, docstrings...)
Those stripped lines are stored in the LineSet class which gives access to them.
Then each index of the stripped lines collection is associated with the hash of n successive entries of the stripped lines starting at the current index
(n is the minimum common lines option).
The common hashes between both linesets are then looked for. If there are matches, then the match indices in both linesets are stored and associated
with the corresponding couples (start line number/end line number) in both files.
This association is then postprocessed to handle the case of successive matches. For example if the minimum common lines setting is set to four, then
the hashes are computed with four lines. If one of match indices couple (12, 34) is the successor of another one (11, 33) then it means that there are
in fact five lines wich are common.
Once postprocessed the values of association table are the result looked for, i.e start and end lines numbers of common lines in both files.
    N)defaultdict)getopt)BufferedIOBaseBufferedReaderBytesIO)chaingroupby)AnyDict	FrozenSet	GeneratorIterableList
NamedTupleNewTypeOptionalSetTextIOTupleUnion)nodes)BaseCheckerMapReduceMixintable_lines_from_stats)IRawChecker)Table)CheckerStats)decoding_stream   z.*\w+Index
LineNumberc                   @   s   e Zd ZU eed< eed< dS )LineSpecifsline_numbertextN)__name__
__module____qualname__r    __annotations__str r)   r)   U/home/ubuntu/cloudmapper/venv/lib/python3.10/site-packages/pylint/checkers/similar.pyr!   [   s   
 r!   
LinesChunkSuccessiveLinesLimitsc                   @   s.   e Zd ZdZdZdddddeddfd	d
ZdS )CplSuccessiveLinesLimitsz
    This class holds a couple of SuccessiveLinesLimits objects, one for each file compared,
    and a counter on the number of common lines between both stripped lines collections extracted
    from both files
    
first_filesecond_fileeffective_cmn_lines_nbr/   r,   r0   r1   returnNc                 C   s   || _ || _|| _d S Nr.   )selfr/   r0   r1   r)   r)   r*   __init__t   s   
z!CplSuccessiveLinesLimits.__init__)r$   r%   r&   __doc__	__slots__intr5   r)   r)   r)   r*   r-   k   s    r-   LineSetStartCouplec                   @   sn   e Zd ZdZdZdededee ddfdd	Zd
e	de
fddZdefddZdefddZdefddZdS )r+   zm
    The LinesChunk object computes and stores the hash of some consecutive stripped lines of a lineset.
    _fileid_index_hashfileidnum_linelinesr2   Nc                 G   s,   || _ 	 t|| _	 tdd |D | _d S )Nc                 s       | ]}t |V  qd S r3   )hash).0linr)   r)   r*   	<genexpr>       z&LinesChunk.__init__.<locals>.<genexpr>)r;   r   r<   sumr=   )r4   r>   r?   r@   r)   r)   r*   r5      s   
zLinesChunk.__init__oc                 C   s   t |tstS | j|jkS r3   )
isinstancer+   NotImplementedr=   )r4   rH   r)   r)   r*   __eq__      
zLinesChunk.__eq__c                 C      | j S r3   )r=   r4   r)   r)   r*   __hash__   s   zLinesChunk.__hash__c                 C   s   d| j  d| j d| j dS )Nz<LinesChunk object for file z (z, z)>r:   rN   r)   r)   r*   __repr__   s   zLinesChunk.__repr__c                 C   s   d| j  d| j d| j S )NzLinesChunk object for file z, starting at line z
 
Hash is r:   rN   r)   r)   r*   __str__   s   zLinesChunk.__str__)r$   r%   r&   r6   r7   r(   r8   r   r5   r	   boolrK   rO   rP   rQ   r)   r)   r)   r*   r+      s    
c                   @   st   e Zd ZdZdZdededdfddZedefd	d
ZedefddZ	e	j
deddfddZ	defddZdS )r,   z
    A class to handle the numbering of begin and end of successive lines.

    :note: Only the end line number can be updated.
    _start_endstartendr2   Nc                 C   s   || _ || _d S r3   rS   )r4   rV   rW   r)   r)   r*   r5      s   
zSuccessiveLinesLimits.__init__c                 C   rM   r3   )rT   rN   r)   r)   r*   rV         zSuccessiveLinesLimits.startc                 C   rM   r3   rU   rN   r)   r)   r*   rW      rX   zSuccessiveLinesLimits.endvaluec                 C   s
   || _ d S r3   rY   r4   rZ   r)   r)   r*   rW         
c                 C      d| j  d| j dS )Nz<SuccessiveLinesLimits <;>>rS   rN   r)   r)   r*   rP      s   zSuccessiveLinesLimits.__repr__)r$   r%   r&   r6   r7   r    r5   propertyrV   rW   setterr(   rP   r)   r)   r)   r*   r,      s    c                   @   s^   e Zd ZU dZeed< eed< defddZdefddZ	de
fd	d
Zdedd fddZdS )r9   zN
    Indices in both linesets that mark the beginning of successive lines
    fst_lineset_indexsnd_lineset_indexr2   c                 C   r]   )Nz<LineSetStartCouple <r^   r_   )rb   rc   rN   r)   r)   r*   rP      s   zLineSetStartCouple.__repr__c                 C   s&   t |tstS | j|jko| j|jkS r3   )rI   r9   rJ   rb   rc   r4   otherr)   r)   r*   rK      s
   

zLineSetStartCouple.__eq__c                 C   s   t | jt | j S r3   )rB   rb   rc   rN   r)   r)   r*   rO      s   zLineSetStartCouple.__hash__rZ   c                 C   s   t t| j| t| j| S r3   )r9   r   rb   rc   r[   r)   r)   r*   	increment   s   zLineSetStartCouple.incrementN)r$   r%   r&   r6   r   r'   r(   rP   rR   rK   r8   rO   rf   r)   r)   r)   r*   r9      s   
 LineSetlinesetmin_common_linesr2   c              	      s   t t}i }tdd | jD   fddt|D }tt| D ]G^}}| j| j}z
| j||  j}W n tyF   | jd jd }Y nw t	|}	t
t|t|d||	< t| j|	g|R  }
||
 |	 q!||fS )a`  
    Return two dicts. The first associates the hash of successive stripped lines of a lineset
    to the indices of the starting lines.
    The second dict, associates the index of the starting line in the lineset's stripped lines to the
    couple [start, end] lines number in the corresponding file.

    :param lineset: lineset object (i.e the lines in a file)
    :param min_common_lines: number of successive lines that are used to compute the hash
    :return: a dict linking hashes to corresponding start index and a dict that links this
             index to the start and end lines in the file
    c                 s   s    | ]}|j V  qd S r3   )r#   )rC   xr)   r)   r*   rE      s    zhash_lineset.<locals>.<genexpr>c                    s   g | ]
}t  |d  qS r3   )iter)rC   ir@   r)   r*   
<listcomp>   s    z hash_lineset.<locals>.<listcomp>   )rV   rW   )r   listtuplestripped_linesrange	enumeratezipr"   
IndexErrorr   r,   r    r+   nameappend)rh   ri   
hash2indexindex2linesshifted_linesindex_i
succ_linesstart_linenumberend_linenumberindexl_cr)   rm   r*   hash_lineset   s,   
r   all_couplesc              
   C   s   t |  D ]Q}g }|td}|| v rB| | jj| | j_| | jj| | j_| |  jd7  _|| |td}|| v s|D ]}z| 	| W qD t
yV   Y qDw qdS )a"  
    Removes all successive entries in the dictionary in argument

    :param all_couples: collection that has to be cleaned up from successives entries.
                        The keys are couples of indices that mark the beginning of common entries
                        in both linesets. The values have two parts. The first one is the couple
                        of starting and ending line numbers of common successives lines in the first file.
                        The second part is the same for the second file.

    For example consider the following dict:

    >>> all_couples
    {(11, 34): ([5, 9], [27, 31]),
     (23, 79): ([15, 19], [45, 49]),
     (12, 35): ([6, 10], [28, 32])}

    There are two successives keys (11, 34) and (12, 35).
    It means there are two consecutive similar chunks of lines in both files.
    Thus remove last entry and update the last line numbers in the first entry

    >>> remove_successives(all_couples)
    >>> all_couples
    {(11, 34): ([5, 10], [27, 32]),
     (23, 79): ([15, 19], [45, 49])}
    rp   N)rr   keysrf   r   r/   rW   r0   r1   ry   popKeyError)r   couple	to_removetesttargetr)   r)   r*   remove_successives  s$   
r   ls_1	stindex_1ls_2	stindex_2common_lines_nbc                 C   sP   dd | j |||  D }dd |j |||  D }tdd t||D S )az  
    Return the effective number of common lines between lineset1 and lineset2 filtered from non code lines, that is to say the number of
    common successive stripped lines except those that do not contain code (for example a ligne with only an
    ending parathensis)

    :param ls_1: first lineset
    :param stindex_1: first lineset starting index
    :param ls_2: second lineset
    :param stindex_2: second lineset starting index
    :param common_lines_nb: number of common successive stripped lines before being filtered from non code lines
    :return: the number of common successives stripped lines that contain code
    c                 S      g | ]}t |jr|jqS r)   REGEX_FOR_LINES_WITH_CONTENTmatchr#   rC   lspecifr)   r)   r*   rn   Q      
z(filter_noncode_lines.<locals>.<listcomp>c                 S   r   r)   r   r   r)   r)   r*   rn   V  r   c                 s   s    | ]	\}}||kV  qd S r3   r)   )rC   sline_1sline_2r)   r)   r*   rE   [  s    z'filter_noncode_lines.<locals>.<genexpr>)rs   rG   rv   )r   r   r   r   r   stripped_l1stripped_l2r)   r)   r*   filter_noncode_lines>  s   r   c                   @   sF   e Zd ZU eed< ded< eed< eed< ded< eed< eed< d	S )
Commonalitycmn_lines_nbrg   fst_lsetfst_file_startfst_file_endsnd_lsetsnd_file_startsnd_file_endN)r$   r%   r&   r8   r'   r    r)   r)   r)   r*   r   ^  s   
 r   c                   @   s  e Zd ZdZeddddfdedededededd	fd
dZ		d%dede	de
e dd	fddZd&ddZdeeeee f  fddZdeeeee f  dd	fddZdeeeee f  defddZdddddeed	d	f fddZdeed	d	f fdd Zd!d" Zd#d$ Zd	S )'Similarz,finds copy-pasted lines of code in a projectF	min_linesignore_commentsignore_docstringsignore_importsignore_signaturesr2   Nc                 C   s(   || _ || _|| _|| _|| _g | _d S r3   )r   r   r   r   r   linesets)r4   r   r   r   r   r   r)   r)   r*   r5   k  s   
zSimilar.__init__streamidstreamencodingc              	   C   sj   t |tr|du rtt||j}n|j}z| jt|| | j| j	| j
| j W dS  ty4   Y dS w )z(append a file to search for similaritiesN)rI   r   
ValueErrorr   	readlinesr   ry   rg   r   r   r   r   UnicodeDecodeError)r4   r   r   r   r   r)   r)   r*   append_streamz  s&   


zSimilar.append_streamc                 C   s    | j dkrdS | |   dS )z<start looking for similarities and display results on stdoutr   N)r   _display_sims_compute_simsrN   r)   r)   r*   run  s   
zSimilar.runc                 C   s   t t}|  D ]=}|j}|j}|j}|j}|j}|j}|j	}	|| }
|
D ]}|||f|v s5|||	f|v r7 nq%|

|||f|||	fh qg }| D ]\}}|D ]	}|
||f qRqL|  |  |S )z&compute similarities in appended files)r   rq   
_iter_simsr   r   r   r   r   r   r   ry   itemssortreverse)r4   no_duplicatescommonalitynumlineset1start_line_1
end_line_1lineset2start_line_2
end_line_2	duplicatecouplessims	ensemblescplsr)   r)   r*   r     sD   zSimilar._compute_simssimilaritiesc                 C   s   |  |}t| dS )z'Display computed similarities on stdoutN)_get_similarity_reportprint)r4   r   reportr)   r)   r*   r     s   
zSimilar._display_simsc              
   C   s   d}d}|D ]X\}}|d| dt | d7 }t|}d } }}	|D ]\}}}	|d|j d| d	|	 d
7 }q#|rT|j||	 D ]}
||
 rPd|
  dnd7 }qA||t |d  7 }qtdd | jD }|d| d| d|d | dd7 }|S )z!Create a report from similarities r   
z similar lines in z files
N==:[:z]
z   rp   c                 s   rA   r3   lenrC   rh   r)   r)   r*   rE     rF   z1Similar._get_similarity_report.<locals>.<genexpr>zTOTAL lines=z duplicates=z	 percent=      Y@z.2f)r   sortedrx   _real_linesrstriprG   r   )r4   r   r   duplicated_line_numbernumberr   	couples_lline_set
start_lineend_linelinetotal_line_numberr)   r)   r*   r     s    "&zSimilar._get_similarity_reportr   rg   r   c              
   #   s<   t || j\ }t || j\}}t  }t| }t||@  fddd}i }	t|tddD ].}
t |
 ||
 D ]!}|d }|d }t	t

|| t

|| | jd|	t||< q@q4t|	 |	 D ]0\}}|j}|j}|j}t|||jj|jj||jj|jjd}t|||||}|| jkr|V  qkd	S )
al  
        Find similarities in the two given linesets.

        This the core of the algorithm.
        The idea is to compute the hashes of a minimal number of successive lines of each lineset and then compare the hashes.
        Every match of such comparison is stored in a dict that links the couple of starting indices in both linesets to
        the couple of corresponding starting and ending lines in both files.
        Last regroups all successive couples in a bigger one. It allows to take into account common chunk of lines that have more
        than the minimal number of successive lines required.
        c                    s    |  d S Nr   r)   )mhash_to_index_1r)   r*   <lambda>  s    z&Similar._find_common.<locals>.<lambda>keyr<   r   rp   )r1   )r   r   r   r   r   r   r   N)r   r   	frozensetr   r   operator
attrgetter	itertoolsproductr-   copyr9   r   r   rb   rc   r1   r   r/   rV   rW   r0   r   )r4   r   r   index_to_lines_1hash_to_index_2index_to_lines_2hash_1hash_2common_hashesr   c_hashindices_in_linesetsindex_1index_2cml_stripped_lcmn_lstart_index_1start_index_2nb_common_linescom
eff_cmn_nbr)   r   r*   _find_common  sZ   


zSimilar._find_commonc                 c   sL    t | jdd D ]\}}| j|d d D ]}| ||E dH  qq
dS )zWiterate on similarities among all files, by making a cartesian
        product
        Nro   rp   )ru   r   r   )r4   idxrh   r   r)   r)   r*   r     s   zSimilar._iter_simsc                 C   rM   )zReturns the data we can use for a map/reduce process

        In this case we are returning this instance's Linesets, that is all file
        information that will later be used for vectorisation.
        r   rN   r)   r)   r*   get_map_data'  s   zSimilar.get_map_datac                 C   s   dd |D | _ dS )oReduces and recombines data into a format that we can report on

        The partner function of get_map_data()c                 S   s   g | ]	}|D ]}|qqS r)   r)   )rC   rh   r   r)   r)   r*   rn   3  s    z2Similar.combine_mapreduce_data.<locals>.<listcomp>Nr  )r4   linesets_collectionr)   r)   r*   combine_mapreduce_data/  s   zSimilar.combine_mapreduce_datar3   r2   N)r$   r%   r&   r6   DEFAULT_MIN_SIMILARITY_LINEr8   rR   r5   r(   STREAM_TYPESr   r   r   r   r   r   LinesChunkLimits_Tr   r   r   r   r   r   r   r  r  r)   r)   r)   r*   r   h  sh    


'


Er   r@   r   r   r   r   c                    s  |s|rt d| }|r%dd |jD }dd t|dd dD }d	}|rKd
ttj dtjdttj f fdd  g |}	tt	dd |	D  }
g }d}t
| ddD ]s\}}| }|r|s|dsk|drx|dd }|dd }n|ds|dr|dd }|dd }|r||rd}d}|r|||}|rd}|r|ddd  }|r||
v rd}|r|t|t|d d qU|S )as  
    Return tuples of line/line number/line type with leading/trailing whitespace and any ignored code features removed

    :param lines: a collection of lines
    :param ignore_comments: if true, any comment in the lines collection is removed from the result
    :param ignore_docstrings: if true, any line that is a docstring is removed from the result
    :param ignore_imports: if true, any line that is an import is removed from the result
    :param ignore_signatures: if true, any line that is part of a function signature is removed from the result
    :return: the collection of line/line number/line type tuples
    r   c                 s   s(    | ]}|j t|tjtjffV  qd S r3   )linenorI   r   Import
ImportFrom)rC   noder)   r)   r*   rE   J  s
    
z!stripped_lines.<locals>.<genexpr>c                 S   s$   i | ]\}}|t d d |D qS )c                 s   s    | ]\}}|V  qd S r3   r)   )rC   _	is_importr)   r)   r*   rE   O  rF   z,stripped_lines.<locals>.<dictcomp>.<genexpr>)all)rC   r
  node_is_import_groupr)   r)   r*   
<dictcomp>N  s    z"stripped_lines.<locals>.<dictcomp>c                 S   s   | d S r   r)   )rj   r)   r)   r*   r   Q  s    z stripped_lines.<locals>.<lambda>r   F	functionstreer2   c                    sL   |j D ] }t|tjtjfr| | t|tjtjtjfr# | | q| S )zLRecursively get all functions including nested in the classes from the tree.)bodyrI   r   FunctionDefAsyncFunctionDefry   ClassDef)r  r  r  _get_functionsr)   r*   r  W  s   


z&stripped_lines.<locals>._get_functionsc                 s   s4    | ]}t |j|jr|jd  jn|jd V  qdS )r   rp   N)rt   r
  r  tolineno)rC   funcr)   r)   r*   rE   k  s    
Nrp   )rV   z"""z'''   zr"""zr'''r   #r   )r#   r"   )astroidparsejoinr  r   r   r   NodeNGsetr   ru   strip
startswithendswithgetsplitry   r!   r    )r@   r   r   r   r   r  node_is_import_by_linenoline_begins_importcurrent_line_is_importr  signature_linesstrippedlines	docstringr
  r   r)   r  r*   rs   6  sx   

rs   c                   @   s   e Zd ZdZ				ddedee dedededed	d
fddZdd Zdd Z	dd Z
dd Zdd Zdd Zedd Zedd Zd
S )rg   z
    Holds and indexes all the lines of a single source file.
    Allows for correspondance between real lines of the source file and stripped ones, which
    are the real ones from which undesired patterns have been removed.
    Frx   r@   r   r   r   r   r2   Nc                 C   s"   || _ || _t|||||| _d S r3   )rx   r   rs   _stripped_lines)r4   rx   r@   r   r   r   r   r)   r)   r*   r5     s
   	

zLineSet.__init__c                 C   s   d| j  dS )Nz<Lineset for >rx   rN   r)   r)   r*   rQ     s   zLineSet.__str__c                 C   s
   t | jS r3   )r   r   rN   r)   r)   r*   __len__     
zLineSet.__len__c                 C   s
   | j | S r3   r/  )r4   r   r)   r)   r*   __getitem__  r3  zLineSet.__getitem__c                 C   s   | j |j k S r3   r1  rd   r)   r)   r*   __lt__  s   zLineSet.__lt__c                 C   s   t | S r3   )idrN   r)   r)   r*   rO     s   zLineSet.__hash__c                 C   s   t |tsdS | j|jkS )NF)rI   rg   __dict__rd   r)   r)   r*   rK     rL   zLineSet.__eq__c                 C   rM   r3   r4  rN   r)   r)   r*   rs     rX   zLineSet.stripped_linesc                 C   rM   r3   )r   rN   r)   r)   r*   
real_lines  rX   zLineSet.real_lines)FFFF)r$   r%   r&   r6   r(   r   rR   r5   rQ   r2  r5  r6  rO   rK   r`   rs   r9  r)   r)   r)   r*   rg     s>    


R0801)zSimilar lines in %s files
%szduplicate-codezIndicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication.stats	old_statsc                 C   s2   g d}|t ||d7 }| t|dddd dS )z/make a layout with some stats about duplication)r   nowprevious
differencenb_duplicated_linespercent_duplicated_linesr   rp   )childrencolsrheaderscheadersN)r   ry   r   )sectr;  r<  r@   r)   r)   r*   report_similarities  s
   rH  c                
   @   s   e Zd ZdZefZdZeZde	ddddfdd	d
dddfdd	d
dddfddd
dddfddd
dddffZ
ddeffZd'd(ddZd)ddZdd Zdejddfdd Zd!d" Zd#d$ Zd%d& ZdS )*SimilarCheckerzchecks for similarities and duplicated code. This computation may be
    memory / CPU intensive, so you should disable it if you experiment some
    problems.
    r   min-similarity-linesr8   z<int>z%Minimum lines number of a similarity.)defaulttypemetavarhelpignore-commentsTynz<y or n>z4Comments are removed from the similarity computationignore-docstringsz6Docstrings are removed from the similarity computationignore-importsFz3Imports are removed from the similarity computationignore-signaturesz6Signatures are removed from the similarity computationRP0801DuplicationNr2   c                 C   s@   t | | tj| | jj| jj| jj| jj| jjd i | _	d S )N)r   r   r   r   r   )
r   r5   r   configmin_similarity_linesr   r   r   r   r;  )r4   linterr)   r)   r*   r5   "  s   
zSimilarChecker.__init__c                 C   s   t | |||| |dkr| jj| _dS |dkr| jj| _dS |dkr*| jj| _dS |dkr5| jj| _dS |dkr@| jj| _dS dS )zmethod called to set an option (registered in the options list)

        Overridden to report options setting to Similar
        rJ  rO  rQ  rR  rS  N)	r   
set_optionrV  rW  r   r   r   r   r   )r4   optnamerZ   actionoptdictr)   r)   r*   rY  .  s   zSimilarChecker.set_optionc                 C   s   g | _ | jjddd| _dS )z<init the checkers: reset linesets and statistics informationr   r@  N)r   rX  	add_statsr;  rN   r)   r)   r*   open?  s   zSimilarChecker.openr  c                 C   sB   |  }| | jj||j W d   dS 1 sw   Y  dS )zprocess a module

        the module's content is accessible via the stream object

        stream must implement the readlines method
        N)r   r   rX  current_namefile_encoding)r4   r  r   r)   r)   r*   process_moduleF  s   
"zSimilarChecker.process_modulec                 C   s   t dd | jD }d}| j}|  D ]S\}}g }d } }}	|D ]\}}}	|d|j d| d|	 d q!|  |rN|j||	 D ]	}
||
  qD| j	d	t
|d
|fd ||t
|d  7 }q||d< |or|d | |d< dS )zAcompute and display similarities on closing (i.e. end of parsing)c                 s   rA   r3   r   r   r)   r)   r*   rE   R  rF   z'SimilarChecker.close.<locals>.<genexpr>r   Nr   r   r   ]r:  r   )argsrp   rA  r   rB  )rG   r   r;  r   ry   rx   r   r9  r   add_messager   r!  )r4   total
duplicatedr;  r   r   msgrh   r   r   r   r)   r)   r*   closeP  s    "zSimilarChecker.closec                 C   s
   t | S )zPassthru override)r   r  rN   r)   r)   r*   r  e  r\   zSimilarChecker.get_map_datac                 C   sR   t |}| j|_| j|_| j|_| j|_| j|_|  tj||d |	  dS )r  )r  N)
rI  r   r   r   r   r   r^  r   r  rh  )r4   rX  data
recombinedr)   r)   r*   reduce_map_datai  s   zSimilarChecker.reduce_map_datar3   r  )NN)r$   r%   r&   r6   r   __implements__rx   MSGSmsgsr  optionsrH  reportsr5   rY  r^  r   Modulera  rh  r  rk  r)   r)   r)   r*   rI    sb    



0

rI  c                 C   s   |  t|  dS )z-required method to auto register this checkerN)register_checkerrI  )rX  r)   r)   r*   registerx  s   rs  c                 C   s$   t d t   t d t|  dS )z&display command line usage informationz*finds copy pasted blocks in a set of fileszUsage: symilar [-d|--duplicates min_duplicated_lines] [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] [--ignore-signatures] file1...N)r   sysexit)statusr)   r)   r*   usage}  s   rw  c              	   C   s  | du rt jdd } d}d}t}d}d}d}d}t| ||\}}	|D ]0\}
}|
dv r0t|}q#|
dv r8t  q#|
dv r?d	}q#|
d
v rFd	}q#|
dv rMd	}q#|
dv rSd	}q#|	sZtd t|||||}|	D ]}t|dd}||| W d   n1 s}w   Y  qd|	  t 
d dS )z$standalone command line access pointNrp   hdi)rN  zduplicates=rO  rR  rQ  rS  F)z-dz--duplicates)z-hz--help)z-iz--ignore-commentsT)z--ignore-docstrings)z--ignore-imports)z--ignore-signatureszutf-8)r   r   )rt  argvr  r   r8   rw  r   r^  r   r   ru  )ry  s_optsl_optsr   r   r   r   r   optsrc  optvalsimfilenamer   r)   r)   r*   Run  sH   

r  __main__)r   r3   )Mr6   r   	functoolsr   r   rert  collectionsr   r   ior   r   r   r   r   typingr	   r
   r   r   r   r   r   r   r   r   r   r   r   r  r   pylint.checkersr   r   r   pylint.interfacesr   pylint.reporters.ureports.nodesr   pylint.typingr   pylint.utilsr   r  compiler   r8   r   r    r!   HashToIndex_TIndexToLines_Tr  r-   CplIndexToCplLines_Tr+   r,   r9   r	  r   r   r   r   r   r(   rR   rs   total_orderingrg   rm  rH  rI  rs  rw  r  r$   r)   r)   r)   r*   <module>   s   <


%

*,
 
 O
`4
 


-
