diff --git a/lib/offline/KillSwitchManager.hpp b/lib/offline/KillSwitchManager.hpp index 6eda4b1be..284c6eefa 100644 --- a/lib/offline/KillSwitchManager.hpp +++ b/lib/offline/KillSwitchManager.hpp @@ -7,9 +7,12 @@ #include "pal/PAL.hpp" +#include #include -#include #include +#include +#include +#include #include @@ -39,8 +42,8 @@ namespace MAT_NS_BEGIN { std::string timeStr = headers.get("Retry-After"); if (!timeStr.empty()) { - int64_t timeinSecs = std::stoi(timeStr); - if (timeinSecs > 0) + int64_t timeinSecs = 0; + if (tryParseSeconds(timeStr, timeinSecs) && timeinSecs > 0) { std::lock_guard guard(m_lock); m_retryAfterExpiryTime = PAL::getUtcSystemTime() + timeinSecs; @@ -64,6 +67,13 @@ namespace MAT_NS_BEGIN { // Strip suffix and assume ':all' events of that tenant are killed token.erase(pos, token.length() - pos); } + // Ignore kill-tokens containing control characters; tenant + // tokens are otherwise opaque and safe (the DELETE is + // parameterized), so they must remain killable. + if (!isValidTenantToken(token)) + { + continue; + } killtokensVector.push_back(token); } @@ -71,7 +81,7 @@ namespace MAT_NS_BEGIN { std::string timeString = headers.get("kill-duration"); if (!timeString.empty()) { - timeinSecs = std::stoi(timeString); + tryParseSeconds(timeString, timeinSecs); } if (killtokensVector.size() > 0 && timeinSecs > 0) @@ -156,6 +166,86 @@ namespace MAT_NS_BEGIN { } private: + // Parse a count of seconds from a response-header value (Retry-After / + // kill-duration). Returns false when the value is malformed or out of + // range instead of letting std::stoll throw: the worker thread that drives + // handleResponse has no exception guard, so a throw here would crash the + // process. A standards-compliant server may legitimately send Retry-After + // as an HTTP-date, which is non-numeric and must be ignored. + // + // RFC 7231 delay-seconds is 1*DIGIT, optionally surrounded by HTTP optional + // whitespace (OWS = SP / HTAB). We trim only OWS and require the remaining + // value to be all digits. This rejects malformed inputs that std::stoll + // would otherwise accept -- a leading '+'/'-' sign, or leading CR/LF and + // other control whitespace that stoll silently skips -- and keeps leading + // and trailing handling symmetric. + static bool tryParseSeconds(const std::string& value, int64_t& outSeconds) + { + outSeconds = 0; + size_t begin = 0; + size_t end = value.size(); + while (begin < end && (value[begin] == ' ' || value[begin] == '\t')) + { + ++begin; + } + while (end > begin && (value[end - 1] == ' ' || value[end - 1] == '\t')) + { + --end; + } + if (begin == end) + { + return false; + } + for (size_t i = begin; i < end; ++i) + { + if (value[i] < '0' || value[i] > '9') + { + return false; + } + } + try + { + // Only std::out_of_range can fire now (the substring is all digits); + // catching it ignores an over-large value rather than crashing. + const long long parsed = std::stoll(value.substr(begin, end - begin)); + // Clamp to a value that cannot overflow when later added to a current + // UTC timestamp (seconds) to compute an expiry time. No legitimate + // Retry-After / kill-duration approaches this; an absurd value is + // capped instead of wrapping the expiry into the past. + const int64_t kMaxSeconds = 100LL * 365 * 24 * 60 * 60; // ~100 years + outSeconds = (parsed > kMaxSeconds) ? kMaxSeconds : static_cast(parsed); + return true; + } + catch (const std::exception&) + { + return false; + } + } + + // Tenant tokens are opaque strings elsewhere in the SDK (they may contain + // spaces, quotes, etc.), and the offline-storage DELETE is parameterized, + // so any value is safe to act on. Only reject genuinely unsafe content: + // control characters (including CR/LF, which could enable log injection) + // and over-long values. Over-restricting here would prevent a legitimately + // stored tenant token from ever being killed. + static bool isValidTenantToken(const std::string& token) + { + if (token.empty() || token.size() > 256) + { + return false; + } + for (unsigned char c : token) + { + // Reject C0 control characters and DEL; allow any other byte + // (printable ASCII and UTF-8 sequences). + if (c < 0x20 || c == 0x7f) + { + return false; + } + } + return true; + } + std::map m_tokenTime; std::mutex m_lock; bool m_isRetryAfterActive; diff --git a/lib/offline/OfflineStorage_SQLite.cpp b/lib/offline/OfflineStorage_SQLite.cpp index 0ce28278e..b9b2ed83d 100644 --- a/lib/offline/OfflineStorage_SQLite.cpp +++ b/lib/offline/OfflineStorage_SQLite.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace MAT_NS_BEGIN { @@ -398,7 +399,6 @@ namespace MAT_NS_BEGIN { void OfflineStorage_SQLite::DeleteRecords(const std::map & whereFilter) { - UNREFERENCED_PARAMETER(whereFilter); if (!isOpen()) { return; } @@ -413,40 +413,100 @@ namespace MAT_NS_BEGIN { return; } #endif - auto formatter = [&](const std::map & whereFilter) + // SECURITY: build a parameterized statement. Column names are taken + // from a fixed whitelist (never from server/response data) and every + // value is bound via sqlite3_bind_* instead of being concatenated into + // the SQL text. This prevents untrusted values (for example kill-token + // tenant ids carried in a collector response) from altering the + // statement or appending additional "stacked" statements. + enum class ColumnType { Text, Integer }; + static const std::map kAllowedColumns = { + { "record_id", ColumnType::Text }, + { "tenant_token", ColumnType::Text }, + { "latency", ColumnType::Integer }, + { "persistence", ColumnType::Integer }, + { "retry_count", ColumnType::Integer }, + }; + + std::string clause; + std::vector::const_iterator> boundColumns; + for (auto it = whereFilter.begin(); it != whereFilter.end(); ++it) { - std::string clause; - for (const auto &kv : whereFilter) + if (kAllowedColumns.find(it->first) == kAllowedColumns.end()) { - bool quotes = false; - if ((kv.first == "record_id") || - (kv.first == "tenant_token")) + // Fail closed: an unrecognized column cannot be honored, so + // delete nothing rather than running a looser predicate. This + // matches MemoryStorage, whose matcher treats an unknown column + // as "no match". + LOG_WARN("DeleteRecords: unrecognized filter column '%s'; nothing deleted", it->first.c_str()); + return; + } + clause += clause.empty() ? "" : " AND "; + clause += it->first; + clause += "=?"; + boundColumns.push_back(it); + } + + // Never run a DELETE with no predicate, which would erase the table. + if (clause.empty()) + { + LOG_WARN("DeleteRecords: no recognized filter columns; nothing deleted"); + return; + } + + const std::string sql = "DELETE FROM " TABLE_NAME_EVENTS " WHERE " + clause; + SqliteStatement stmt(*m_db, sql.c_str()); + if (stmt.handle() == nullptr) + { + LOG_ERROR("DeleteRecords: failed to prepare delete statement for table " TABLE_NAME_EVENTS ": %s", + g_sqlite3Proxy->sqlite3_errmsg(*m_db)); + return; + } + int idx = 1; + for (const auto& it : boundColumns) + { + const std::string& value = it->second; + int rc = SQLITE_OK; + if (kAllowedColumns.at(it->first) == ColumnType::Text) + { + rc = g_sqlite3Proxy->sqlite3_bind_text(stmt.handle(), idx, + value.data(), static_cast(value.size()), SQLITE_STATIC); + } + else + { + int64_t numeric = 0; + size_t consumed = 0; + try { - // string types - quotes = true; - } - else if ( - // integer types - (kv.first == "latency") || - (kv.first == "persistence") || - (kv.first == "retry_count")) + numeric = static_cast(std::stoll(value, &consumed)); + } + catch (const std::exception&) { - quotes = false; + consumed = 0; } - if (!clause.empty()) + // Treat a non-numeric value for an integer column as an invalid + // filter and abort, rather than coercing to 0 and deleting rows + // that happen to match 0. + if (value.empty() || consumed != value.size()) { - clause += " AND "; + LOG_WARN("DeleteRecords: invalid numeric filter value for column '%s'; nothing deleted", + it->first.c_str()); + return; } - clause += kv.first; - clause += "="; - clause += (quotes) ? - ("\"" + kv.second + "\"") : - kv.second; + rc = g_sqlite3Proxy->sqlite3_bind_int64(stmt.handle(), idx, numeric); } - return clause; - }; - std::string sql = "DELETE FROM " TABLE_NAME_EVENTS " WHERE "; - Execute(sql + formatter(whereFilter)); + if (rc != SQLITE_OK) + { + LOG_ERROR("DeleteRecords: failed to bind filter column '%s': %d (%s)", + it->first.c_str(), rc, g_sqlite3Proxy->sqlite3_errmsg(*m_db)); + return; + } + ++idx; + } + if (!stmt.execute()) + { + LOG_ERROR("DeleteRecords: failed to execute delete for table " TABLE_NAME_EVENTS); + } } } diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt index 8b52a5fb3..679bcd511 100644 --- a/tests/unittests/CMakeLists.txt +++ b/tests/unittests/CMakeLists.txt @@ -27,6 +27,7 @@ set(SRCS HttpResponseDecoderTests.cpp HttpServerTests.cpp InformationProviderImplTests.cpp + KillSwitchManagerTests.cpp LoggerTests.cpp LogManagerImplTests.cpp LogSessionDataTests.cpp diff --git a/tests/unittests/KillSwitchManagerTests.cpp b/tests/unittests/KillSwitchManagerTests.cpp new file mode 100644 index 000000000..307e58b73 --- /dev/null +++ b/tests/unittests/KillSwitchManagerTests.cpp @@ -0,0 +1,157 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 +// +#include "common/Common.hpp" +#include "offline/KillSwitchManager.hpp" + +using namespace Microsoft::Applications::Events; + +TEST(KillSwitchManagerTests, handleResponse_ValidRetryAfter_ActivatesRetryAfter) +{ + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "120"); + manager.handleResponse(headers); + ASSERT_TRUE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_NonNumericRetryAfter_DoesNotThrowAndIsIgnored) +{ + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "not-a-number"); + ASSERT_NO_THROW(manager.handleResponse(headers)); + ASSERT_FALSE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_HttpDateRetryAfter_DoesNotThrowAndIsIgnored) +{ + // RFC 7231 permits Retry-After as an HTTP-date. It is non-numeric and must + // be ignored rather than crash the worker thread. + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "Wed, 21 Oct 2025 07:28:00 GMT"); + ASSERT_NO_THROW(manager.handleResponse(headers)); + ASSERT_FALSE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_OutOfRangeRetryAfter_DoesNotThrowAndIsIgnored) +{ + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "999999999999999999999999999999"); + ASSERT_NO_THROW(manager.handleResponse(headers)); + ASSERT_FALSE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_RetryAfterWithTrailingGarbage_IsIgnored) +{ + // A numeric prefix followed by trailing garbage (e.g. "120; foo") must be + // treated as malformed, not parsed as 120. + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "120; foo"); + ASSERT_NO_THROW(manager.handleResponse(headers)); + ASSERT_FALSE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_RetryAfterWithTrailingWhitespace_IsAccepted) +{ + // Trailing whitespace is allowed (HTTP OWS) and must still parse. + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "120 "); + manager.handleResponse(headers); + ASSERT_TRUE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_RetryAfterWithTrailingCRLF_IsIgnored) +{ + // CR/LF are not HTTP OWS (only SP / HTAB is); a value carrying them is + // malformed and must be ignored rather than parsed from its numeric prefix. + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "120\r\n"); + manager.handleResponse(headers); + ASSERT_FALSE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_RetryAfterWithLeadingWhitespace_IsAccepted) +{ + // Leading HTTP OWS (SP / HTAB) is trimmed; the value still parses. + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", " 120"); + manager.handleResponse(headers); + ASSERT_TRUE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_RetryAfterWithLeadingSign_IsIgnored) +{ + // RFC 7231 delay-seconds is 1*DIGIT; a leading '+'/'-' sign (which std::stoll + // would accept) is malformed and must be ignored. + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "+120"); + manager.handleResponse(headers); + ASSERT_FALSE(manager.isRetryAfterActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_HugeRetryAfter_IsClampedNotWrapped) +{ + // A huge (still in-range) Retry-After must be clamped so the expiry time does + // not overflow and wrap into the past; the back-off must stay active. + KillSwitchManager manager; + HttpHeaders headers; + headers.add("Retry-After", "9000000000000000000"); // ~9e18, near INT64_MAX + manager.handleResponse(headers); + ASSERT_TRUE(manager.isRetryAfterActive()); + ASSERT_TRUE(manager.isTokenBlocked("any-token")); // far-future expiry, not wrapped +} + +TEST(KillSwitchManagerTests, handleResponse_ValidKillTokenAndDuration_BlocksToken) +{ + KillSwitchManager manager; + HttpHeaders headers; + headers.add("kill-tokens", "tenant-token-1"); + headers.add("kill-duration", "300"); + ASSERT_TRUE(manager.handleResponse(headers)); + ASSERT_TRUE(manager.isTokenBlocked("tenant-token-1")); +} + +TEST(KillSwitchManagerTests, handleResponse_NonNumericKillDuration_DoesNotThrowAndDoesNotBlock) +{ + KillSwitchManager manager; + HttpHeaders headers; + headers.add("kill-tokens", "tenant-token-1"); + headers.add("kill-duration", "forever"); + ASSERT_NO_THROW(manager.handleResponse(headers)); + ASSERT_FALSE(manager.isTokenBlocked("tenant-token-1")); +} + +TEST(KillSwitchManagerTests, handleResponse_KillTokenWithControlChars_IsRejected) +{ + // A token containing control characters (e.g. CR/LF) is unsafe -- it could + // enable log injection -- and must be ignored. + KillSwitchManager manager; + HttpHeaders headers; + headers.add("kill-tokens", std::string("tenant\r\ninjected")); + headers.add("kill-duration", "300"); + ASSERT_NO_THROW(manager.handleResponse(headers)); + ASSERT_FALSE(manager.isActive()); +} + +TEST(KillSwitchManagerTests, handleResponse_OpaqueKillTokenWithUnusualChars_IsBlocked) +{ + // Tenant tokens are opaque (they may contain spaces/quotes), and the + // offline-storage DELETE is parameterized, so such a token is safe to act on + // and must remain killable -- over-restricting would let it escape kill. + KillSwitchManager manager; + HttpHeaders headers; + const std::string token = "x\" OR 1=1; DROP TABLE events; --"; + headers.add("kill-tokens", token); + headers.add("kill-duration", "300"); + ASSERT_TRUE(manager.handleResponse(headers)); + ASSERT_TRUE(manager.isTokenBlocked(token)); +} diff --git a/tests/unittests/OfflineStorageTests_SQLite.cpp b/tests/unittests/OfflineStorageTests_SQLite.cpp index eacf45bee..fdbb3fbe6 100644 --- a/tests/unittests/OfflineStorageTests_SQLite.cpp +++ b/tests/unittests/OfflineStorageTests_SQLite.cpp @@ -183,6 +183,54 @@ TEST_F(OfflineStorageTests_SQLite, DeletedRecordsAreNotReturned) EXPECT_THAT(consumer.records[0].id, StrEq("guid2")); } +TEST_F(OfflineStorageTests_SQLite, DeleteRecordsRejectsTenantTokenSqlInjection) +{ + initializeStorage(); + ASSERT_THAT(offlineStorage->StoreRecord({"guid1", "tokenA", EventLatency_Normal, EventPersistence_Normal, 1, {}}), true); + ASSERT_THAT(offlineStorage->StoreRecord({"guid2", "tokenB", EventLatency_Normal, EventPersistence_Normal, 1, {}}), true); + + // A malicious kill-token value attempting stacked SQL (OR 1=1 / DROP TABLE). + // It must be treated as a literal tenant_token value that matches nothing. + const std::string malicious = "x\" OR 1=1; DROP TABLE events; --"; + offlineStorage->DeleteRecords({{"tenant_token", malicious}}); + + // The events table must still exist and no rows should have been deleted. + TestRecordConsumer consumer; + EXPECT_THAT(offlineStorage->GetAndReserveRecords(consumer, 100000), true); + ASSERT_THAT(consumer.records.size(), 2); +} + +TEST_F(OfflineStorageTests_SQLite, DeleteRecordsByTenantTokenDeletesOnlyMatching) +{ + initializeStorage(); + ASSERT_THAT(offlineStorage->StoreRecord({"guid1", "tokenA", EventLatency_Normal, EventPersistence_Normal, 1, {}}), true); + ASSERT_THAT(offlineStorage->StoreRecord({"guid2", "tokenB", EventLatency_Normal, EventPersistence_Normal, 1, {}}), true); + + offlineStorage->DeleteRecords({{"tenant_token", "tokenA"}}); + + TestRecordConsumer consumer; + EXPECT_THAT(offlineStorage->GetAndReserveRecords(consumer, 100000), true); + ASSERT_THAT(consumer.records.size(), 1); + EXPECT_THAT(consumer.records[0].id, StrEq("guid2")); +} + +TEST_F(OfflineStorageTests_SQLite, DeleteRecordsFailsClosedOnUnrecognizedColumn) +{ + initializeStorage(); + ASSERT_THAT(offlineStorage->StoreRecord({"guid1", "tokenA", EventLatency_Normal, EventPersistence_Normal, 1, {}}), true); + ASSERT_THAT(offlineStorage->StoreRecord({"guid2", "tokenB", EventLatency_Normal, EventPersistence_Normal, 1, {}}), true); + + // An unrecognized column cannot be honored. Like MemoryStorage's matcher, + // the delete must fail closed (remove nothing) rather than drop the column + // and run a looser predicate. + offlineStorage->DeleteRecords({{"not_a_column", "x"}}); + offlineStorage->DeleteRecords({{"tenant_token", "tokenA"}, {"not_a_column", "x"}}); + + TestRecordConsumer consumer; + EXPECT_THAT(offlineStorage->GetAndReserveRecords(consumer, 100000), true); + ASSERT_THAT(consumer.records.size(), 2); +} + TEST_F(OfflineStorageTests_SQLite, ReservedRecordsAreReleasedAfterTimeout) { initializeStorage(); diff --git a/tests/unittests/UnitTests.vcxproj b/tests/unittests/UnitTests.vcxproj index a7412e484..faf465e97 100644 --- a/tests/unittests/UnitTests.vcxproj +++ b/tests/unittests/UnitTests.vcxproj @@ -437,6 +437,7 @@ + diff --git a/tests/unittests/UnitTests.vcxproj.filters b/tests/unittests/UnitTests.vcxproj.filters index 86dbf15c4..6a1476519 100644 --- a/tests/unittests/UnitTests.vcxproj.filters +++ b/tests/unittests/UnitTests.vcxproj.filters @@ -4,6 +4,7 @@ +