The following are30code examples frompandas.read_stata()You can vote for the ones you like or vote for the ones you don't and go to the original project or source file by following the links above each example. You can also check all available functions/classes of the modulepandas, or treatthe search function.
Example 1
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_encoding(self, version): # GH 4626, gerenciamento de codificação adequado raw = read_stata(self.dta_encoding) with tm.assert_produces_warning(FutureWarning): codificado = read_stata(self.dta_encoding, encoding='latin-1') resultado = codificado .kreis1849[0] esperado = raw.kreis1849[0] asertar resultado == asertar esperado isinstance(resultado, compat.string_types) com tm.ensure_clean() como rota: com tm.assert_produces_warning(FutureWarning): encoded.to_stata(ruta , write_index=False, version=version, encoding='latin-1') reread_encoded = read_stata(ruta) tm.assert_frame_equal(encoded, reread_encoded)
How to convert a python file into an exe... How to convert a python file to an exe and create an installer part 2
Example #2
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_dtype_conversion(self): expected = self.read_csv(self.csv15) expected['byte_'] = expected['byte_'].astype(np.int8) expected['int_'] = expected['int_']. astype(np.int16) expected['long_'] = expected['long_'].astype(np.int32) expected['float_'] = expected['float_'].astype(np.float32) expected['double_ '] = expected['double_'].astype(np.float64) expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d', )) no_conversion = read_stata(self.dta15_117, convert_dates=True) tm.assert_frame_equal(expected, no_conversion) conversion = read_stata(self.dta15_117, convert_dates=True, preserve_dtypes=False) # read_csv types are the same as expected = self.read_cs ( self.csv15) expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d',)) tm.assert_frame_equal(expected, conversion)
Example #3
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_out_of_range_float(self): original = DataFrame({'ColumnOk': [0.0, np.finfo(np.float32).eps, np.finfo(np.float32).max / 10.0], 'ColumnTooBig': [0.0, np.finfo(np.float32).eps, np.finfo(np.float32).max]}) original.index.name = 'index' para col no original: original[col] = original[col].astype( np.float32) com tm.ensure_clean() como rota: original.to_stata(ruta) releer = read_stata(ruta) original['ColumnTooBig'] = original['ColumnTooBig'].astype( np.float64) tm.assert_frame_equal(original , reread.set_index('index')) original.loc[2, 'ColumnTooBig'] = np.inf msg = ("Column ColumnTooBig tiene un valor máximo de infinito que" "está fuera del rango suportado por Stata") com pytest .raises(ValueError, match=msg): com tm.ensure_clean() como rota: original.to_stata(ruta)
Example #4
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_date_parsing_ignores_format_details(self, column): # GH 17797 # # Tests that display formats are ignored when determining whether a numeric column # is a date value. # # All date types are stored as numbers and the format associated with the # column indicates the date type and display format. # # STATA supports 9 date types, each with different units. We tried 7 # out of 9 types, ignoring %tC and %tb. %tC is a variant of %tc that # represents leap seconds and %tb is based on the STATA business calendar. df = read_stata(self.stata_dates) raw = df.loc[0, column] formatted = df.loc[0, column + "_fmt"] assert raw == formatted
Example #5
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_mixed_string_strl(self): # GH 23633 output = [ {'mixed': 'string' * 500, 'number': 0}, {'mixed': None, 'number': 1} ] output = pd.DataFrame( output) output.number = output.number.astype('int32') com tm.ensure_clean() como caminho: output.to_stata(path, write_index=False, version=117) reread = read_stata(path) esperado = output.fillna ('') tm.assert_frame_equal(reread, esperado) # Verifique se strl suporta todos None (null) output.loc[:, 'mixed'] = None output.to_stata(path, write_index=False, convert_strl=['mixed' ] , versão=117) reler = read_stata(caminho) esperado = output.fillna('') tm.assert_frame_equal(reler, esperado)
Example #6
Source file:prueba_varmax.pyOfvnpy_cryptoswindlerMY license | 6wishes | ![]() ![]() |
def setup_class(cls, true, order, trend, error_cov_type, cov_type='approx', **kwargs): cls.true = true # 1960:T1 - 1982:T4 with open(current_path + os.sep + 'resultados' + os.sep + 'manufac.dta', 'rb') como test_data: dta = pd.read_stata(test_data) dta.index = pd.DatetimeIndex(dta.month, freq='MS') dta['dlncaputil'] = dta['lncaputil'].diff() dta['dlnhoras'] = dta['lncaputil'].diff() endog = dta.loc['1972-02-01':, ['dlncaputil', 'dlnhoras' ]] com advertencias.catch_warnings(record=True) como w: advertencias.simplefilter('always') cls.model = varmax.VARMAX(endog, order=order, trend=trend, error_cov_type=error_cov_type, **kwargs) cls. resultados = cls.model.smooth(true['params'], cov_type=cov_type)
Example #7
Source file:test_stata.pyOfvnpy_cryptoswindlerMY license | 6wishes | ![]() ![]() |
def test_encoding(self, version): # GH 4626, tratamento de codificação adequado raw = read_stata(self.dta_encoding) codificado = read_stata(self.dta_encoding, encoding="latin-1") resultado = encoded.kreis1849[0] se compat . PY3: esperado = raw.kreis1849[0] assert result == esperado assert isinstance(result, compat.string_types) else: esperado = raw.kreis1849.str.decode("latin-1")[0] assert result == esperado assert isinstance(result, unicode) # noqa com tm.ensure_clean() como caminho: encoded.to_stata(path, encoding='latin-1', write_index=False, version=version) reread_encoded = read_stata(path, encoding='latin -1') tm.assert_frame_equal(encoded, reread_encoded)
Example #8
Source file:test_stata.pyOfvnpy_cryptoswindlerMY license | 6wishes | ![]() ![]() |
def test_dtype_conversion(self): expected = self.read_csv(self.csv15) expected['byte_'] = expected['byte_'].astype(np.int8) expected['int_'] = expected['int_']. astype(np.int16) expected['long_'] = expected['long_'].astype(np.int32) expected['float_'] = expected['float_'].astype(np.float32) expected['double_ '] = expected['double_'].astype(np.float64) expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d', )) no_conversion = read_stata(self.dta15_117, convert_dates=True) tm.assert_frame_equal(expected, no_conversion) conversion = read_stata(self.dta15_117, convert_dates=True, preserve_dtypes=False) # read_csv types are the same as expected = self.read_cs ( self.csv15) expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d',)) tm.assert_frame_equal(expected, conversion)
Example #9
Source file:test_stata.pyOfvnpy_cryptoswindlerMY license | 6wishes | ![]() ![]() |
def test_out_of_range_float(self): original = DataFrame({'ColumnOk': [0.0, np.finfo(np.float32).eps, np.finfo(np.float32).max / 10.0], 'ColumnTooBig': [0.0, np.finfo(np.float32).eps, np.finfo(np.float32).max]}) original.index.name = 'index' para col no original: original[col] = original[col].astype( np.float32) com tm.ensure_clean() como rota: original.to_stata(ruta) releer = read_stata(ruta) original['ColumnTooBig'] = original['ColumnTooBig'].astype( np.float64) tm.assert_frame_equal(original , reread.set_index('index')) original.loc[2, 'ColumnTooBig'] = np.inf com pytest.raises(ValueError) como cm: com tm.ensure_clean() como rota: original.to_stata(ruta) declare 'ColumnTooBig' em cm.exception declare 'infinito' em cm.exception
Example #10
Source file:test_stata.pyOfvnpy_cryptoswindlerMY license | 6wishes | ![]() ![]() |
def test_date_parsing_ignores_format_details(self, column): # GH 17797 # # Tests that display formats are ignored when determining whether a numeric column # is a date value. # # All date types are stored as numbers and the format associated with the # column indicates the date type and display format. # # STATA supports 9 date types, each with different units. We tried 7 # out of 9 types, ignoring %tC and %tb. %tC is a variant of %tc that # represents leap seconds and %tb is based on the STATA business calendar. df = read_stata(self.stata_dates) raw = df.loc[0, column] formatted = df.loc[0, column + "_fmt"] assert raw == formatted
Example #11
Source file:stata.pyOfvnpy_cryptoswindlerMY license | 6wishes | ![]() ![]() |
def read_stata(filepath_or_buffer, convert_dates=True, convert_categoricals=True, encoding=Ninguno, index_col=Ninguno, convert_missing=False, preserve_dtypes=True, columnas=Ninguno, order_categoricals=True, chunksize=Ninguno, iterator=False): lector = StataReader( filepath_or_buffer, convert_dates=convert_dates, convert_categoricals=convert_categoricals, index_col=index_col, convert_missing=convert_missing, preserve_dtypes=preserve_dtypes, column=colunas, order_categoricals=order_categoricals, chunksize=chunksize, encoding=encoding) if iterator o chunksize: data = reader else: intenção: data = reader.read() finalmente: reader.close() devuelve dados
Example #12
Source file:test_stata.pyOfpredictive maintenance using machine learningswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_encoding(self, version): # GH 4626, gerenciamento de codificação adequado raw = read_stata(self.dta_encoding) with tm.assert_produces_warning(FutureWarning): codificado = read_stata(self.dta_encoding, encoding='latin-1') resultado = codificado .kreis1849[0] esperado = raw.kreis1849[0] asertar resultado == asertar esperado isinstance(resultado, compat.string_types) com tm.ensure_clean() como rota: com tm.assert_produces_warning(FutureWarning): encoded.to_stata(ruta , write_index=False, version=version, encoding='latin-1') reread_encoded = read_stata(ruta) tm.assert_frame_equal(encoded, reread_encoded)
Example #13
Source file:test_stata.pyOfpredictive maintenance using machine learningswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_dtype_conversion(self): expected = self.read_csv(self.csv15) expected['byte_'] = expected['byte_'].astype(np.int8) expected['int_'] = expected['int_']. astype(np.int16) expected['long_'] = expected['long_'].astype(np.int32) expected['float_'] = expected['float_'].astype(np.float32) expected['double_ '] = expected['double_'].astype(np.float64) expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d', )) no_conversion = read_stata(self.dta15_117, convert_dates=True) tm.assert_frame_equal(expected, no_conversion) conversion = read_stata(self.dta15_117, convert_dates=True, preserve_dtypes=False) # read_csv types are the same as expected = self.read_cs ( self.csv15) expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d',)) tm.assert_frame_equal(expected, conversion)
Example #14
Source file:test_stata.pyOfpredictive maintenance using machine learningswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_out_of_range_float(self): original = DataFrame({'ColumnOk': [0.0, np.finfo(np.float32).eps, np.finfo(np.float32).max / 10.0], 'ColumnTooBig': [0.0, np.finfo(np.float32).eps, np.finfo(np.float32).max]}) original.index.name = 'index' para col no original: original[col] = original[col].astype( np.float32) com tm.ensure_clean() como rota: original.to_stata(ruta) releer = read_stata(ruta) original['ColumnTooBig'] = original['ColumnTooBig'].astype( np.float64) tm.assert_frame_equal(original , reread.set_index('index')) original.loc[2, 'ColumnTooBig'] = np.inf msg = ("Column ColumnTooBig tiene un valor máximo de infinito que" "está fuera del rango suportado por Stata") com pytest .raises(ValueError, match=msg): com tm.ensure_clean() como rota: original.to_stata(ruta)
Example #15
Source file:test_stata.pyOfpredictive maintenance using machine learningswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_date_parsing_ignores_format_details(self, column): # GH 17797 # # Tests that display formats are ignored when determining whether a numeric column # is a date value. # # All date types are stored as numbers and the format associated with the # column indicates the date type and display format. # # STATA supports 9 date types, each with different units. We tried 7 # out of 9 types, ignoring %tC and %tb. %tC is a variant of %tc that # represents leap seconds and %tb is based on the STATA business calendar. df = read_stata(self.stata_dates) raw = df.loc[0, column] formatted = df.loc[0, column + "_fmt"] assert raw == formatted
Example #16
Source file:test_stata.pyOfpredictive maintenance using machine learningswindlerApache 2.0 License | 6wishes | ![]() ![]() |
def test_mixed_string_strl(self): # GH 23633 output = [ {'mixed': 'string' * 500, 'number': 0}, {'mixed': None, 'number': 1} ] output = pd.DataFrame( output) output.number = output.number.astype('int32') com tm.ensure_clean() como caminho: output.to_stata(path, write_index=False, version=117) reread = read_stata(path) esperado = output.fillna ('') tm.assert_frame_equal(reread, esperado) # Verifique se strl suporta todos None (null) output.loc[:, 'mixed'] = None output.to_stata(path, write_index=False, convert_strl=['mixed' ] , versão=117) reler = read_stata(caminho) esperado = output.fillna('') tm.assert_frame_equal(reler, esperado)
Example #17
Source file:prueba_liml.pyOfeconomic toolsswindler"New" or "Revised" 3-Clause BSD License | 6wishes | ![]() ![]() |
def setup_class(cls): """Stata reg output from `sysuse auto; reg price mpg`""" cls.init(cls) test_path = path.split(path.relpath(__file__))[0] auto_path = path. join(test_path, 'data', 'auto.dta') autodata = pd.read_stata(auto_path) y = 'precio' x_end = ['mpg', 'longitud'] z = ['tronco', 'peso', ' headroom'] x_exog = [] nosingles = True cls.result = ivreg(autodata, y, x_end, z, x_exog, addcons=True, iv_method='liml', nosingles=nosingles) cls.expected = liml_std
Example #18
Source file:prueba_liml.pyOfeconomic toolsswindler"New" or "Revised" 3-Clause BSD License | 6wishes | ![]() ![]() |
def setup_class(cls): """Stata reg salida de `sysuse auto; reg price mpg`""" cls.init(cls) cls.precision['se'] = 0 cls.precision['CI_low'] = 0 cls.precision['CI_high'] = -1 test_path = path.split(path.relpath(__file__))[0] auto_path = path.join(test_path, 'data', 'auto.dta') autodata = pd.read_stata (ruta_automática) y = 'precio' x_end = ['mpg', 'longitud'] z = ['maletero', 'peso', 'espacio libre'] x_exog = [] nosingles = True cls.result = ivreg(autodata, y , x_end, z, x_exog, addcons=True, iv_method='liml', vce_type='robust', nosingles=nosingles) cls.expected = liml_robust
Example #19
Source file:prueba_liml.pyOfeconomic toolsswindler"New" or "Revised" 3-Clause BSD License | 6wishes | ![]() ![]() |
def setup_class(cls): """Stata reg salida de `sysuse auto; reg price mpg`""" cls.init(cls) cls.precision['se'] = 0 cls.precision['CI_low'] = 0 cls.precision['CI_high'] = 0 test_path = path.split(path.relpath(__file__))[0] auto_path = path.join(test_path, 'data', 'auto.dta') autodata = pd.read_stata( auto_path) y = 'precio' x_end = ['mpg', 'longitud'] z = ['maletero', 'peso', 'margen'] x_exog = [] nosingles = True cls.result = ivreg(autodata, y, x_end, z, x_exog, addcons=True, iv_method='liml', cluster='gear_ratio', nosingles=nosingles) cls.expected = liml_cluster
Example #20
Source file:prueba_liml.pyOfeconomic toolsswindler"New" or "Revised" 3-Clause BSD License | 6wishes | ![]() ![]() |
def setup_class(cls): """Stata reg output from `sysuse auto; reg price mpg`""" cls.init(cls) test_path = path.split(path.relpath(__file__))[0] auto_path = path. join(test_path, 'data', 'auto.dta') autodata = pd.read_stata(auto_path) y = 'preço' x_end = ['mpg', 'comprimento'] z = ['peso', 'tronco'] x_exog = [] nosingles = True cls.result = ivreg(autodata, y, x_end, z, x_exog, addcons=True, iv_method='liml', cluster='gear_ratio', nosingles=nosingles) cls.expected = tsls_cluster
Example #21
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def analizado_114(rutadirección): dta14_114 = os.ruta.join(rutadirección, 'stata5_114.dta') analizado_114 = read_stata(dta14_114, convert_dates=True) analizado_114.index.name = 'index' return analizado_114
Example #22
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def read_dta(self, file): # Default configuration inherited from reader return read_stata(file, convert_dates=True)
Example #23
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def test_read_empty_dta(self, version): empty_ds = DataFrame(columns=['unit']) # GH 7369, make sure you can read a 0-obs dta file with tm.ensure_clean() as route: empty_ds.to_stata( path, write_index =False, version=version) empty_ds2 = read_stata(ruta) tm.assert_frame_equal(empty_ds, empty_ds2)
Example #24
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def test_105(self): # Dados obtidos de: # http://go.worldbank.org/ZXY29PVJ21 dpath = os.path.join(self.dirpath, 'S4_EDUC1.dta') df = pd.read_stata(dpath) df0 = [[1, 1, 3, -2], [2, 1, 2, -2], [4, 1, 1, -2]] df0 = pd.DataFrame(df0) df0.columns = ["clustnum ", "pri_schl", "psch_num", "psch_dis"] df0['clustnum'] = df0["clustnum"].astype(np.int16) df0['pri_schl'] = df0["pri_schl"].astype( np.int8) df0['psch_num'] = df0["psch_num"].astype(np.int8) df0['psch_dis'] = df0["psch_dis"].astype(np.float32) tm.assert_frame_equal(df. cabeça(3), df0)
Example #25
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def test_drop_column(self): expected = self.read_csv(self.csv15) expected['byte_'] = expected['byte_'].astype(np.int8) expected['int_'] = expected['int_']. astype(np.int16) expected['long_'] = expected['long_'].astype(np.int32) expected['float_'] = expected['float_'].astype(np.float32) expected['double_ '] = expected['double_'].astype(np.float64) expected['date_td'] = expected['date_td'].apply(datetime.strptime, args=('%Y-%m-%d', )) column = ['byte_', 'int_', 'long_'] expected = expected [columns] discarded = read_stata(self.dta15_117, convert_dates=True, column=columns) tm.assert_frame_equal(expected, discarded) # Ver PR 10757 columns = ['int_', 'long_', 'byte_'] expected = expected [columns] reordered = read_stata(self.dta15_117, convert_dates=True, column=columns) tm.assert_frame_equal(expected, reordered) msg = "columns contain duplicate entries" with pytest.raises(ValueError, match=msg): column = ['byte_', 'byte_'] read_stata(self.dta15_117, convert_dates=True, column=columns) msg = ("The the following columns were not found in the Stata dataset:" " not_found") with pytest.raises(ValueError, match=msg): c olu mn = ['byte_', 'int_', 'long_', 'not_found'] read_stata(self.dta15_117, convert_dates=True, columns=columns)
Example #26
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def test_categorical_order(self, file): # Build directly using expected codes # Format is is_cat, col_name, labels (in order), expected underlying data = [(True, 'ordered', ['a', 'b', 'c ', 'd', 'e'], np.arange(5)), (True, 'reverse', ['a', 'b', 'c', 'd', 'e'], np.arange(5)[::-1]), (True, 'no order', ['a', 'b', 'c', 'd', 'e'], np.array([2 , 1 , 4, 0, 3])), (True, 'float', ['a', 'b', 'c', 'd', 'e'], np.arange(0, 5)) , ( True , 'float_missing', [ 'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])), (False, 'nolabel', [ 1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)), (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'], np.arange( 5)) ] cols = [] for is_cat, col, labels, codes expected: if is_cat: cols.append((col, pd.Categorical.from_codes(codes, label))) else: cols.append( ( col, pd.Series(labels, dtype=np.float32))) expected = DataFrame.from_dict(OrderedDict(cols)) # Read with and without categoricals, make sure order is identical to file = getattr(self , file) pars ed = read_stata(file) tm. assert_frame_equal(exp ected, parsed, check_categorical=False) # Check the identity of the codes for the expected column: if is_categorical_dtype(expected[col]): tm.assert_series_equal(expected[col].cat.codes, parsed[col]. cat. .codes) tm .assert_index_equal(expected[col].cat.categories, analyzed[col].cat.categories)
Example #27
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def test_categorical_sorting(self, file): parsed = read_stata(getattr(self, file)) # Sort on codes, not strings parsed = parsed.sort_values("srh", na_position='first') # Don't sort on parsed index. index = np.arange(parsed.shape[0]) codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4] categories = ["Bad", "Fair", " Good ", "Very good", "Excellent"] cat = pd.Categorical.from_codes(codes=codes,categories=categories) expected = pd.Series(cat, name='srh') tm.assert_series_equal(expected, parsed [ " srh"], check_categorical=False)
Example #28
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def test_read_chunks_117(self, file, chunksize, convert_categoricals, convert_dates): fname = getattr(self, file) with advertencias.catch_warnings(record=True) as w: advertencias.simplefilter("siempre") analizado = read_stata( fname, convert_categoricals= convert_categoricals, convert_dates=convert_dates) itr = read_stata( fname, iterator=True, convert_categoricals=convert_categoricals, convert_dates=convert_dates) pos = 0 para j en rango(5): con advertencias.catch_warnings(record=True) como w: # advertencias noqa .simplefilter("siempre") intent: fragmento = itr.leer(tamaño de fragmento) excepto StopIteration: romper from_frame = parsed.iloc[pos:pos + chunksize, :] tm.assert_frame_equal( from_frame, chunk, check_dtype=False, check_datetimelike_compat =True , check_categorical=False) pos += tamaño de fragmento itr.close()
Example #29
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def test_iterator(self): fname = self.dta3_117 parsed = read_stata(fname) with read_stata(fname, iterator=True) as itr: chunk = itr.read(5) tm.assert_frame_equal(parsed.iloc[0:5, : ], fragmento) com read_stata(fname, chunksize=5) como itr: chunk = list(itr) tm.assert_frame_equal(parsed.iloc[0:5, :], chunk[0]) with read_stata(fname, iterator=True ) como itr: chunk = itr.get_chunk(5) tm.assert_frame_equal(parsed.iloc[0:5, :], chunk) con read_stata(fname, chunksize=5) como itr: chunk = itr.get_chunk() tm. assert_frame_equal(parsed.iloc[0:5, :], chunk) # GH12153 com read_stata(fname, chunksize=4) como itr: from_chunks = pd.concat(itr) tm.assert_frame_equal(parsed, from_chunks)
Example #30
Source file:test_stata.pyOfrecruitswindlerApache 2.0 License | 5wishes | ![]() ![]() |
def test_read_chunks_115(self, file, chunksize, convert_categoricals, convert_dates): fname = getattr(self, file) # Lê o arquivo inteiro com warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") parsed = read_stata ( fname, convert_categoricals=convert_categoricals, convert_dates=convert_dates) # Compare com o que obtemos quando lemos por chunk itr = read_stata( fname, iterator=True, convert_dates=convert_dates, convert_categoricals=convert_categoricals) pos = 0 for j in range(5): with warnings.catch_warnings(record=True) as w: # noqa warnings.simplefilter("always") try: chunk = itr.read(chunksize) except StopIteration: break from_frame = parsed.iloc[pos:pos + chunksize, :] tm.assert_frame_equal(from_frame, chunk, check_dtype=False, check_datetimelike_compat=True, check_categorical=False) pos += tamanho do chunk itr.close()