o
    g                      @   s  d Z ddlmZ ddlZddlZddlZddlZddlm	Z	m
Z
 ddlmZ dd Zdd Zejd	d
dgejdg ddd Zdd Zdd Zejddi e	ddgifdddie	ddgifdddgie	dddgifddgd d!e	ddgifddgd"d!e	dejdgifgd#d$ Zd%d& Zejd'g d(d)d* Zejd+d d"gd,d- Zd.d/ Zejdg d0d1d2 Zejdg d3d4d5 Zd6d7 Zd8d9 ZdS ):zZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesION)	DataFrameread_csvc                 C   sL   d}| }t d|}|j|d|d}tddggddgd	}t|| d S )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr   parserdataresultexpected r   b/home/ubuntu/cloudmapper/venv/lib/python3.10/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_input   s   r   c                 C   s@   | }t d }|j|ddd d}tddgg}t|| d S )Nu   Łaski, Jan;1;utf-8)r   r   headeru   Łaski, Jan   r   )r   r   r   r   r   r   r   r   test_read_csv_unicode   s
   r   r   ,	r   )utf-16zutf-16lezutf-16bec              	   C   s  | }d d|}dtd d}|dd}d}t|[}d	d
lm} ||}	t|d}
|
|	 W d    n1 s>w   Y  t	||}|||d}|j
|fd|i|}|j
|fd|i|}|  t|| W d    d S 1 s{w   Y  d S )Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r   __
   z__.csv   )r   skiprowsr   r   )TextIOWrapperwbr   r   )replacer   randsensure_cleanior%   r   openwriter   r   closer   )r   r   r   r   r   pathkwargsutf8r%   
bytes_datafbytes_bufferr   r   r   r   r   test_utf16_bom_skiprows(   s,   

"r5   c                 C   s6   t j|d}| }|j|ddd}t|dksJ d S )Nzutf16_ex.txtr    r   )r   r   2   )osr/   joinr   len)r   csv_dir_pathr/   r   r   r   r   r   test_utf16_exampleJ   s   r;   c                 C   sL   t j|d}| }|j|d dd}|d}|d d }d}||ks$J d S )Nunicode_series.csvlatin-1)r   r   r   r   i`  u$   Á köldum klaka (Cold Fever) (1994))r7   r/   r8   r   	set_index)r   r:   r/   r   r   gotr   r   r   r   test_unicode_encodingQ   s   
r@   zdata,kwargs,expectedza
1ar   z"a"
1	quotechar"zb
1namesb1z
1T)rD   skip_blank_linesFc                    sD   | }d d fdd}|j ||fdi|}t|| d S )Nu   ﻿r   c                    s    |   }t|S )N)r   r   )_databom_databomr1   r   r   _encode_data_with_bomv   s   z,test_utf8_bom.<locals>._encode_data_with_bomr   )r   r   r   )r   r   r0   r   r   rL   r   r   rJ   r   test_utf8_bom]   s   rM   c                 C   sL   t dgdgd}| }||}d|}|jt||d}t|| d S )Ng333333@test)mb_num	multibytezmb_num,multibyte
4.8,testr'   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r   r   r   r   r   r   test_read_csv_utf_aliases~   s   

rT   zfile_path,encoding)))r+   r   csvz	test1.csvr   ))r+   r   r   r<   r=   ))r+   r   r   zsauron.SHIFT_JIS.csvshiftjisc                 C   s  | }|| }|j ||d}t||d}| |}	|jrJ W d    n1 s(w   Y  t||	 t|dd}
|j |
|d}	|
jrFJ W d    n1 sPw   Y  t||	 t|ddd}
|j |
|d}	|
jroJ W d    n1 syw   Y  t||	 d S )Nr'   rb)moder   )rX   	buffering)r   r,   closedr   r   )r   r:   	file_pathr   datapathr   fpathr   far   fbr   r   r   test_binary_mode_file_buffers   s$   
r`   pass_encodingc           	      C   s   | }| |}tddgi}tjd|dd$}|d |d |j||r(|nd d}t|| W d    d S 1 s=w   Y  d S )	Nfoobarzw+T)rX   r   return_filelikezfoo
barr   r'   )rQ   r   r   r*   r-   seekr   r   )	r   rR   rS   ra   r   r   r   r3   r   r   r   r   test_encoding_temp_file   s   


"rf   c                 C   s   | }d}|j dkrtd d}d}t||gi}t -}|| d| | |d |j	||d}t
|| |jrBJ W d    d S 1 sMw   Y  d S )	Nz	shift-jispythonz3NamedTemporaryFile does not work with Python engineu	   てすとu   こむ
r   r'   )enginepytestskipr   tempfileNamedTemporaryFiler-   r   re   r   r   r   rZ   )r   r   r   titler   r   r3   r   r   r   r   test_encoding_named_temp_file   s   



"ro   )r   r    z	utf-16-bez	utf-16-lezutf-32c                 C   sR   d}t || }t|d| d}tddgddgdd	ggd
dgd}t|| d S )Nu   a	b
：foo	0
bar	1
baz	2r   )	delimiterr   u   ：foor   rc   r   bazr#   rA   rE   )r   r   r   )r   r   encoded_datar   r   r   r   r   %test_parse_encoded_special_characters   s
   "rs   )r   Nr    r   r=   c                 C   sx   | }t g dg dg dd}t }|j|d|d |j||dd}W d    n1 s/w   Y  t|| d S )	N)Raphael	DonatellozMiguel AngelLeonardo)redpurpleorangeblue)saizbo staffnunchunkkatana)namemaskweaponF)indexr   T)r   
memory_map)r   r   r*   to_csvr   r   )r   r   r   r   filedfr   r   r   test_encoding_memory_map   s   
r   c                 C   s|   | }t dgd d}d|jd< td}|j|dddd	 |j|d
ddd}W d
   n1 s1w   Y  t|| d
S )zO
    Chunk splits a multibyte character with memory_map=True

    GH 43540
    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  zbug-gh43540.csvFr   )r   r   r   NTc)r   r   ri   )r   ilocr   r*   r   r   r   )r   r   r   fnamedfrr   r   r    test_chunk_splits_multibyte_char   s   
r   c                 C   s|   | }|j dv rtd t }|d |d ||}W d    n1 s*w   Y  tg dgd}t	
|| d S )N)rg   pyarrowz5SpooledTemporaryFile does only work with the c-engines   abcdr   abcdr   )ri   rj   rk   rl   SpooledTemporaryFiler-   re   r   r   r   r   )r   r   handler   r   r   r   r   test_not_readable  s   




r   )__doc__r+   r   r7   rl   numpynprj   pandasr   r   pandas._testing_testingr   r   r   markparametrizer5   r;   r@   nanrM   rT   r`   rf   ro   rs   r   r   r   r   r   r   r   <module>   sZ    	 





