From faf78b70fb43a041b8812a4f47571fd0b915c604 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Mon, 27 Apr 2026 16:03:17 +0530 Subject: [PATCH 1/2] fix: fetch VARCHAR UTF-8 collation columns as SQL_C_WCHAR on Windows to prevent lossy ACP conversion On Windows, the ODBC Driver Manager converts SQL_C_CHAR data to the system's ANSI code page (typically CP1252) before delivering it to the application. This is lossy for characters outside CP1252: CJK/Emoji get replaced with '?' (irreversible data loss) and extended Latin characters arrive as CP1252 bytes that fail UTF-8 decoding (returned as raw bytes instead of str). Fix: When charEncoding is 'utf-8' on Windows, fetch VARCHAR/CHAR/LONGVARCHAR columns as SQL_C_WCHAR (UTF-16LE) instead of SQL_C_CHAR. The ODBC driver converts losslessly to UTF-16LE, bypassing the lossy ACP conversion entirely. Changes in ddbc_bindings.cpp: - Add ShouldFetchCharAsWChar() helper (Windows-only, UTF-8 only) - SQLGetData_wrap (fetchone path): fetch VARCHAR as SQL_C_WCHAR when active, decode via PyUnicode_FromWideChar, with LOB streaming fallback - SQLBindColums (batch path): accept charEncoding param, bind VARCHAR into wcharBuffers as SQL_C_WCHAR when active - FetchBatchData: route VARCHAR columns to ProcessWChar dispatcher when active, compute correct fetchBufferSize for WCHAR buffers - FetchMany_wrap/FetchAll_wrap: pass charEncoding to SQLBindColums - Arrow path: unchanged (uses default empty charEncoding, no WCHAR workaround) Not affected: - Linux/macOS (ShouldFetchCharAsWChar always returns false) - Non-UTF-8 encodings (cp1252, latin-1, gbk, etc. use old SQL_C_CHAR path) - NVARCHAR columns (already use SQL_C_WCHAR) - setencoding API (write path, unrelated to fetch) Test: add test_varchar_utf8_collation_unicode_roundtrip covering ASCII, German, Chinese, Japanese, Russian, Greek, Arabic, Emoji, French through fetchone, fetchall, and fetchmany paths. --- mssql_python/pybind/ddbc_bindings.cpp | 208 +++++++++++++++++++++----- tests/test_013_encoding_decoding.py | 117 +++++++++++++++ 2 files changed, 286 insertions(+), 39 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 47a2a2554..6cc645acf 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -68,6 +68,26 @@ inline std::string GetEffectiveCharDecoding(const std::string& userEncoding) { #endif } +// Returns true if VARCHAR columns should be fetched as SQL_C_WCHAR (UTF-16LE) +// instead of SQL_C_CHAR to avoid the lossy ACP conversion on Windows. +// +// On Windows, the ODBC driver converts SQL_C_CHAR data from the server's encoding +// to the system's ANSI code page (e.g., CP1252). This is lossy for characters +// outside the ACP range. When the user requests UTF-8 decoding for SQL_CHAR, +// we fetch as SQL_C_WCHAR (UTF-16LE) which the ODBC driver converts losslessly, +// then decode from UTF-16LE to Python str. +// +// On Linux/macOS, the ODBC driver already returns UTF-8 for SQL_C_CHAR based +// on the system locale, so this workaround is not needed. +inline bool ShouldFetchCharAsWChar(const std::string& charEncoding) { +#if defined(_WIN32) + return charEncoding == "utf-8" || charEncoding == "UTF-8" || charEncoding == "utf8"; +#else + (void)charEncoding; + return false; +#endif +} + namespace PythonObjectCache { py::object get_time_class(); } @@ -3210,11 +3230,88 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p case SQL_LONGVARCHAR: { if (columnSize == SQL_NO_TOTAL || columnSize == 0 || columnSize > SQL_MAX_LOB_SIZE) { - LOG("SQLGetData: Streaming LOB for column %d (SQL_C_CHAR) " - "- columnSize=%lu", - i, (unsigned long)columnSize); - row.append( - FetchLobColumnData(hStmt, i, SQL_C_CHAR, false, false, charEncoding)); + // LOB path: stream the data + if (ShouldFetchCharAsWChar(charEncoding)) { + // On Windows with UTF-8, fetch LOB VARCHAR as WCHAR to avoid + // lossy ACP conversion + LOG("SQLGetData: Streaming LOB for column %d (SQL_C_WCHAR via " + "UTF-8 workaround) - columnSize=%lu", + i, (unsigned long)columnSize); + row.append( + FetchLobColumnData(hStmt, i, SQL_C_WCHAR, true, false, charEncoding)); + } else { + LOG("SQLGetData: Streaming LOB for column %d (SQL_C_CHAR) " + "- columnSize=%lu", + i, (unsigned long)columnSize); + row.append( + FetchLobColumnData(hStmt, i, SQL_C_CHAR, false, false, charEncoding)); + } + } else if (ShouldFetchCharAsWChar(charEncoding)) { + // On Windows with UTF-8 decoding: fetch VARCHAR as SQL_C_WCHAR + // to bypass the ODBC driver's lossy ACP (e.g. CP1252) conversion. + // The ODBC driver converts losslessly to UTF-16LE for SQL_C_WCHAR. + uint64_t wcharBufSize = (columnSize + 1); // in SQLWCHAR units + std::vector wdataBuffer(wcharBufSize); + SQLLEN dataLen; + ret = SQLGetData_ptr(hStmt, i, SQL_C_WCHAR, wdataBuffer.data(), + wcharBufSize * sizeof(SQLWCHAR), &dataLen); + if (SQL_SUCCEEDED(ret)) { + if (dataLen > 0) { + uint64_t numCharsInData = dataLen / sizeof(SQLWCHAR); + if (numCharsInData <= columnSize) { +#if defined(_WIN32) + PyObject* pyStr = PyUnicode_FromWideChar( + reinterpret_cast(wdataBuffer.data()), + numCharsInData); +#else + PyObject* pyStr = PyUnicode_DecodeUTF16( + reinterpret_cast(wdataBuffer.data()), + numCharsInData * sizeof(SQLWCHAR), NULL, NULL); +#endif + if (pyStr) { + row.append(py::reinterpret_steal(pyStr)); + LOG("SQLGetData: CHAR column %d fetched as WCHAR (UTF-8 " + "workaround), %zu bytes -> decoded", + i, (size_t)dataLen); + } else { + PyErr_Clear(); + LOG_ERROR("SQLGetData: Failed to decode WCHAR data for " + "CHAR column %d", + i); + row.append(py::none()); + } + } else { + // Buffer too small, fallback to LOB streaming + LOG("SQLGetData: CHAR column %d WCHAR data truncated, " + "using streaming LOB", + i); + row.append(FetchLobColumnData(hStmt, i, SQL_C_WCHAR, true, false, + charEncoding)); + } + } else if (dataLen == SQL_NULL_DATA) { + LOG("SQLGetData: Column %d is NULL (CHAR via WCHAR)", i); + row.append(py::none()); + } else if (dataLen == 0) { + row.append(py::str("")); + } else if (dataLen == SQL_NO_TOTAL) { + LOG("SQLGetData: SQL_NO_TOTAL for column %d (CHAR via WCHAR), " + "falling back to LOB", + i); + row.append(FetchLobColumnData(hStmt, i, SQL_C_WCHAR, true, false, + charEncoding)); + } else if (dataLen < 0) { + LOG("SQLGetData: Unexpected negative data length " + "for column %d (CHAR via WCHAR) - dataLen=%ld", + i, (long)dataLen); + ThrowStdException("SQLGetData returned an unexpected negative " + "data length"); + } + } else { + LOG("SQLGetData: Error retrieving WCHAR data for CHAR column %d " + "- SQLRETURN=%d, returning NULL", + i, ret); + row.append(py::none()); + } } else { // Allocate columnSize * 4 + 1 on ALL platforms (no #if guard). // @@ -3731,9 +3828,13 @@ SQLRETURN SQLFetchScroll_wrap(SqlHandlePtr StatementHandle, SQLSMALLINT FetchOri // For column in the result set, binds a buffer to retrieve column data // TODO: Move to anonymous namespace, since it is not used outside this file +// charEncoding default is "" so callers that don't pass it (e.g. Arrow path) +// will NOT trigger the WCHAR workaround for VARCHAR columns. SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& columnNames, - SQLUSMALLINT numCols, int fetchSize) { + SQLUSMALLINT numCols, int fetchSize, + const std::string& charEncoding = "") { SQLRETURN ret = SQL_SUCCESS; + const bool fetchCharAsWChar = ShouldFetchCharAsWChar(charEncoding); // Bind columns based on their data types for (SQLUSMALLINT col = 1; col <= numCols; col++) { auto columnMeta = columnNames[col - 1].cast(); @@ -3747,29 +3848,41 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column // TODO: handle variable length data correctly. This logic wont // suffice HandleZeroColumnSizeAtFetch(columnSize); - // Use columnSize * 4 + 1 on Linux/macOS to accommodate UTF-8 - // expansion. The ODBC driver returns UTF-8 for SQL_C_CHAR where - // each character can be up to 4 bytes. + if (fetchCharAsWChar) { + // On Windows with UTF-8: bind VARCHAR as SQL_C_WCHAR to + // bypass the ODBC driver's lossy ACP conversion. + uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/; + buffers.wcharBuffers[col - 1].resize(fetchSize * fetchBufferSize); + ret = SQLBindCol_ptr(hStmt, col, SQL_C_WCHAR, + buffers.wcharBuffers[col - 1].data(), + fetchBufferSize * sizeof(SQLWCHAR), + buffers.indicators[col - 1].data()); + } else { + // Use columnSize * 4 + 1 on Linux/macOS to accommodate UTF-8 + // expansion. The ODBC driver returns UTF-8 for SQL_C_CHAR where + // each character can be up to 4 bytes. #if defined(__APPLE__) || defined(__linux__) - uint64_t fetchBufferSize = columnSize * 4 + 1 /*null-terminator*/; + uint64_t fetchBufferSize = columnSize * 4 + 1 /*null-terminator*/; #else - uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/; + uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/; #endif - // TODO: For LONGVARCHAR/BINARY types, columnSize is returned as - // 2GB-1 by SQLDescribeCol. So fetchBufferSize = 2GB. - // fetchSize=1 if columnSize>1GB. So we'll allocate a vector of - // size 2GB. If a query fetches multiple (say N) LONG... - // columns, we will have allocated multiple (N) 2GB sized - // vectors. This will make driver very slow. And if the N is - // high enough, we could hit the OS limit for heap memory that - // we can allocate, & hence get a std::bad_alloc. The process - // could also be killed by OS for consuming too much memory. - // Hence this will be revisited in beta to not allocate 2GB+ - // memory, & use streaming instead - buffers.charBuffers[col - 1].resize(fetchSize * fetchBufferSize); - ret = SQLBindCol_ptr(hStmt, col, SQL_C_CHAR, buffers.charBuffers[col - 1].data(), - fetchBufferSize * sizeof(SQLCHAR), - buffers.indicators[col - 1].data()); + // TODO: For LONGVARCHAR/BINARY types, columnSize is returned as + // 2GB-1 by SQLDescribeCol. So fetchBufferSize = 2GB. + // fetchSize=1 if columnSize>1GB. So we'll allocate a vector of + // size 2GB. If a query fetches multiple (say N) LONG... + // columns, we will have allocated multiple (N) 2GB sized + // vectors. This will make driver very slow. And if the N is + // high enough, we could hit the OS limit for heap memory that + // we can allocate, & hence get a std::bad_alloc. The process + // could also be killed by OS for consuming too much memory. + // Hence this will be revisited in beta to not allocate 2GB+ + // memory, & use streaming instead + buffers.charBuffers[col - 1].resize(fetchSize * fetchBufferSize); + ret = SQLBindCol_ptr(hStmt, col, SQL_C_CHAR, + buffers.charBuffers[col - 1].data(), + fetchBufferSize * sizeof(SQLCHAR), + buffers.indicators[col - 1].data()); + } break; } case SQL_WCHAR: @@ -3923,6 +4036,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum bool isLob; }; std::vector columnInfos(numCols); + const bool fetchCharAsWChar = ShouldFetchCharAsWChar(charEncoding); for (SQLUSMALLINT col = 0; col < numCols; col++) { const auto& columnMeta = columnNames[col].cast(); columnInfos[col].dataType = columnMeta["DataType"].cast(); @@ -3931,22 +4045,31 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum std::find(lobColumns.begin(), lobColumns.end(), col + 1) != lobColumns.end(); columnInfos[col].processedColumnSize = columnInfos[col].columnSize; HandleZeroColumnSizeAtFetch(columnInfos[col].processedColumnSize); - // On Linux/macOS, the ODBC driver returns UTF-8 for SQL_C_CHAR where - // each character can be up to 4 bytes. Must match SQLBindColums buffer. -#if defined(__APPLE__) || defined(__linux__) + SQLSMALLINT dt = columnInfos[col].dataType; bool isCharType = (dt == SQL_CHAR || dt == SQL_VARCHAR || dt == SQL_LONGVARCHAR); - if (isCharType) { - columnInfos[col].fetchBufferSize = columnInfos[col].processedColumnSize * 4 + - 1; // *4 for UTF-8, +1 for null terminator - } else { + + if (fetchCharAsWChar && isCharType) { + // When fetching VARCHAR as WCHAR (UTF-8 workaround on Windows), + // fetchBufferSize is in SQLWCHAR units to match SQLBindColums columnInfos[col].fetchBufferSize = columnInfos[col].processedColumnSize + 1; // +1 for null terminator - } + } else { + // On Linux/macOS, the ODBC driver returns UTF-8 for SQL_C_CHAR where + // each character can be up to 4 bytes. Must match SQLBindColums buffer. +#if defined(__APPLE__) || defined(__linux__) + if (isCharType) { + columnInfos[col].fetchBufferSize = columnInfos[col].processedColumnSize * 4 + + 1; // *4 for UTF-8, +1 for null terminator + } else { + columnInfos[col].fetchBufferSize = + columnInfos[col].processedColumnSize + 1; // +1 for null terminator + } #else - columnInfos[col].fetchBufferSize = - columnInfos[col].processedColumnSize + 1; // +1 for null terminator + columnInfos[col].fetchBufferSize = + columnInfos[col].processedColumnSize + 1; // +1 for null terminator #endif + } } // Performance: Build function pointer dispatch table (once per batch) @@ -3998,7 +4121,13 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum case SQL_CHAR: case SQL_VARCHAR: case SQL_LONGVARCHAR: - columnProcessors[col] = ColumnProcessors::ProcessChar; + // When fetchCharAsWChar is active, VARCHAR data is in wcharBuffers + // (bound as SQL_C_WCHAR) so use the WCHAR processor for decoding. + if (fetchCharAsWChar) { + columnProcessors[col] = ColumnProcessors::ProcessWChar; + } else { + columnProcessors[col] = ColumnProcessors::ProcessChar; + } break; case SQL_WCHAR: case SQL_WVARCHAR: @@ -4397,7 +4526,7 @@ SQLRETURN FetchMany_wrap(SqlHandlePtr StatementHandle, py::list& rows, int fetch ColumnBuffers buffers(numCols, fetchSize); // Bind columns - ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize); + ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize, charEncoding); if (!SQL_SUCCEEDED(ret)) { LOG("FetchMany_wrap: Error when binding columns - SQLRETURN=%d", ret); return ret; @@ -4745,6 +4874,7 @@ SQLRETURN FetchArrowBatch_wrap( if (!hasLobColumns && fetchSize > 0) { // Bind columns + // Arrow path doesn't have per-connection charEncoding, use default "utf-8" ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize); if (!SQL_SUCCEEDED(ret)) { LOG("Error when binding columns"); @@ -5573,7 +5703,7 @@ SQLRETURN FetchAll_wrap(SqlHandlePtr StatementHandle, py::list& rows, ColumnBuffers buffers(numCols, fetchSize); // Bind columns - ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize); + ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize, charEncoding); if (!SQL_SUCCEEDED(ret)) { LOG("FetchAll_wrap: Error when binding columns - SQLRETURN=%d", ret); return ret; diff --git a/tests/test_013_encoding_decoding.py b/tests/test_013_encoding_decoding.py index 034afae68..937feb6d4 100644 --- a/tests/test_013_encoding_decoding.py +++ b/tests/test_013_encoding_decoding.py @@ -7256,5 +7256,122 @@ def test_dae_encoding_large_string(db_connection): cursor.close() +def test_varchar_utf8_collation_unicode_roundtrip(db_connection): + """Test that VARCHAR columns with UTF-8 collation properly round-trip Unicode data. + + This tests the scenario where a VARCHAR column uses a UTF-8 collation + (e.g., Latin1_General_100_CI_AS_SC_UTF8) which enables storing full Unicode + in VARCHAR. The ODBC driver on Windows converts SQL_C_CHAR data to the + system ANSI code page (e.g., CP1252), which is lossy for non-Latin characters. + The fix fetches such columns as SQL_C_WCHAR (UTF-16LE) to preserve all Unicode. + + Covers: fetchone, fetchall, fetchmany paths. + """ + cursor = db_connection.cursor() + + try: + # Create table with UTF-8 collation on VARCHAR column + cursor.execute(""" + CREATE TABLE #test_varchar_utf8_collation ( + id INT PRIMARY KEY, + varchar_utf8 VARCHAR(200) COLLATE Latin1_General_100_CI_AS_SC_UTF8, + nvarchar_ref NVARCHAR(200) + ) + """) + + # Configure UTF-8 decoding for SQL_CHAR (VARCHAR) + db_connection.setdecoding(SQL_CHAR, encoding="utf-8") + db_connection.setdecoding(SQL_WCHAR, encoding="utf-16le") + + # Test cases covering BMP and supplementary plane characters + test_cases = [ + (1, "Hello World"), # ASCII baseline + (2, "Grüße"), # German - extended Latin (in CP1252 range) + (3, "你好世界"), # Chinese - outside CP1252 + (4, "こんにちは"), # Japanese Hiragana - outside CP1252 + (5, "Привет"), # Russian Cyrillic - outside CP1252 + (6, "Hello 世界"), # Mixed ASCII + CJK + (7, "😀😃😄😁"), # Emoji - supplementary plane (4-byte UTF-8) + (8, "Ελληνικά"), # Greek + (9, "مرحبا"), # Arabic + (10, "café résumé naïve"), # French accented + ] + + # Insert using parameterized queries + for id_val, text in test_cases: + cursor.execute( + "INSERT INTO #test_varchar_utf8_collation (id, varchar_utf8, nvarchar_ref) " + "VALUES (?, ?, ?)", + id_val, text, text, + ) + + # ---- Test fetchone path ---- + for id_val, expected_text in test_cases: + cursor.execute( + "SELECT varchar_utf8, nvarchar_ref FROM #test_varchar_utf8_collation WHERE id = ?", + id_val, + ) + row = cursor.fetchone() + assert row is not None, f"No row returned for id={id_val}" + + varchar_result = row[0] + nvarchar_result = row[1] + + # NVARCHAR should always work (baseline check) + assert nvarchar_result == expected_text, ( + f"NVARCHAR mismatch for id={id_val}: " + f"expected {expected_text!r}, got {nvarchar_result!r}" + ) + + # VARCHAR with UTF-8 collation should also return correct str + assert isinstance(varchar_result, str), ( + f"VARCHAR UTF-8 returned {type(varchar_result).__name__} instead of str " + f"for id={id_val} ({expected_text!r}): got {varchar_result!r}" + ) + assert varchar_result == expected_text, ( + f"VARCHAR UTF-8 mismatch for id={id_val}: " + f"expected {expected_text!r}, got {varchar_result!r}" + ) + + # ---- Test fetchall path ---- + cursor.execute( + "SELECT id, varchar_utf8, nvarchar_ref " + "FROM #test_varchar_utf8_collation ORDER BY id" + ) + all_rows = cursor.fetchall() + assert len(all_rows) == len(test_cases), ( + f"fetchall row count mismatch: expected {len(test_cases)}, got {len(all_rows)}" + ) + for row, (expected_id, expected_text) in zip(all_rows, test_cases): + assert row[1] == expected_text, ( + f"fetchall VARCHAR UTF-8 mismatch for id={expected_id}: " + f"expected {expected_text!r}, got {row[1]!r}" + ) + assert row[2] == expected_text, ( + f"fetchall NVARCHAR mismatch for id={expected_id}: " + f"expected {expected_text!r}, got {row[2]!r}" + ) + + # ---- Test fetchmany path ---- + cursor.execute( + "SELECT id, varchar_utf8, nvarchar_ref " + "FROM #test_varchar_utf8_collation ORDER BY id" + ) + many_rows = cursor.fetchmany(5) + assert len(many_rows) == 5, f"fetchmany(5) returned {len(many_rows)} rows" + for row, (expected_id, expected_text) in zip(many_rows, test_cases[:5]): + assert row[1] == expected_text, ( + f"fetchmany VARCHAR UTF-8 mismatch for id={expected_id}: " + f"expected {expected_text!r}, got {row[1]!r}" + ) + + finally: + try: + cursor.execute("DROP TABLE #test_varchar_utf8_collation") + except: + pass + cursor.close() + + if __name__ == "__main__": pytest.main([__file__, "-v"]) From 1e3e0922273ae9ed6f589228016e3d9ba9766067 Mon Sep 17 00:00:00 2001 From: Jahnvi Thakkar Date: Mon, 27 Apr 2026 16:09:38 +0530 Subject: [PATCH 2/2] Formatting test file --- tests/test_013_encoding_decoding.py | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/test_013_encoding_decoding.py b/tests/test_013_encoding_decoding.py index 937feb6d4..867228015 100644 --- a/tests/test_013_encoding_decoding.py +++ b/tests/test_013_encoding_decoding.py @@ -7285,16 +7285,16 @@ def test_varchar_utf8_collation_unicode_roundtrip(db_connection): # Test cases covering BMP and supplementary plane characters test_cases = [ - (1, "Hello World"), # ASCII baseline - (2, "Grüße"), # German - extended Latin (in CP1252 range) - (3, "你好世界"), # Chinese - outside CP1252 - (4, "こんにちは"), # Japanese Hiragana - outside CP1252 - (5, "Привет"), # Russian Cyrillic - outside CP1252 - (6, "Hello 世界"), # Mixed ASCII + CJK - (7, "😀😃😄😁"), # Emoji - supplementary plane (4-byte UTF-8) - (8, "Ελληνικά"), # Greek - (9, "مرحبا"), # Arabic - (10, "café résumé naïve"), # French accented + (1, "Hello World"), # ASCII baseline + (2, "Grüße"), # German - extended Latin (in CP1252 range) + (3, "你好世界"), # Chinese - outside CP1252 + (4, "こんにちは"), # Japanese Hiragana - outside CP1252 + (5, "Привет"), # Russian Cyrillic - outside CP1252 + (6, "Hello 世界"), # Mixed ASCII + CJK + (7, "😀😃😄😁"), # Emoji - supplementary plane (4-byte UTF-8) + (8, "Ελληνικά"), # Greek + (9, "مرحبا"), # Arabic + (10, "café résumé naïve"), # French accented ] # Insert using parameterized queries @@ -7302,7 +7302,9 @@ def test_varchar_utf8_collation_unicode_roundtrip(db_connection): cursor.execute( "INSERT INTO #test_varchar_utf8_collation (id, varchar_utf8, nvarchar_ref) " "VALUES (?, ?, ?)", - id_val, text, text, + id_val, + text, + text, ) # ---- Test fetchone path ---- @@ -7335,13 +7337,12 @@ def test_varchar_utf8_collation_unicode_roundtrip(db_connection): # ---- Test fetchall path ---- cursor.execute( - "SELECT id, varchar_utf8, nvarchar_ref " - "FROM #test_varchar_utf8_collation ORDER BY id" + "SELECT id, varchar_utf8, nvarchar_ref " "FROM #test_varchar_utf8_collation ORDER BY id" ) all_rows = cursor.fetchall() - assert len(all_rows) == len(test_cases), ( - f"fetchall row count mismatch: expected {len(test_cases)}, got {len(all_rows)}" - ) + assert len(all_rows) == len( + test_cases + ), f"fetchall row count mismatch: expected {len(test_cases)}, got {len(all_rows)}" for row, (expected_id, expected_text) in zip(all_rows, test_cases): assert row[1] == expected_text, ( f"fetchall VARCHAR UTF-8 mismatch for id={expected_id}: " @@ -7354,8 +7355,7 @@ def test_varchar_utf8_collation_unicode_roundtrip(db_connection): # ---- Test fetchmany path ---- cursor.execute( - "SELECT id, varchar_utf8, nvarchar_ref " - "FROM #test_varchar_utf8_collation ORDER BY id" + "SELECT id, varchar_utf8, nvarchar_ref " "FROM #test_varchar_utf8_collation ORDER BY id" ) many_rows = cursor.fetchmany(5) assert len(many_rows) == 5, f"fetchmany(5) returned {len(many_rows)} rows"