Skip to content

Commit

Permalink
Merge pull request #1869 from berryzplus/feature/fix_bufferoverflow_c…
Browse files Browse the repository at this point in the history
…utf7

書き込み範囲のチェック漏れ対策(CUtf7)
  • Loading branch information
berryzplus committed Dec 14, 2022
2 parents 3be74b5 + ce00e77 commit 333f2c1
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 44 deletions.
3 changes: 1 addition & 2 deletions sakura_core/charset/CESI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,7 @@ void CESI::GetEncodingInfo_eucjp( const char* pS, const int nLen )
*/
void CESI::GetEncodingInfo_utf7( const char* pS, const int nLen )
{
const char *pr, *pr_end;
char *pr_next;
const char *pr, *pr_end, *pr_next;
int npoints, nlen_setb;
bool berror;

Expand Down
69 changes: 38 additions & 31 deletions sakura_core/charset/CUtf7.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@

#include "StdAfx.h"
#include "CUtf7.h"

#include <string_view>

#include "charset/charcode.h"
#include "charset/codechecker.h"
#include "convert/convert_util2.h"
Expand Down Expand Up @@ -79,31 +82,36 @@ int CUtf7::_Utf7SetBToUni_block( const char* pSrc, const int nSrcLen, wchar_t* p

int CUtf7::Utf7ToUni( const char* pSrc, const int nSrcLen, wchar_t* pDst, bool* pbError )
{
const char *pr, *pr_end;
char *pr_next;
wchar_t *pw;
int nblocklen=0;
bool berror_tmp, berror=false;
bool berror = false;

pr = pSrc;
pr_end = pSrc + nSrcLen;
pw = pDst;
std::string_view src(pSrc, nSrcLen);

auto pr = src.cbegin();
auto pr_end = src.cend();
auto pr_next = src.begin();

auto* pw = pDst;

while( pr_next < pr_end && *pr )
{
bool berror_tmp = false;

do{
// UTF-7 Set D 部分のチェック
nblocklen = CheckUtf7DPart( pr, pr_end-pr, &pr_next, &berror_tmp );
if( berror_tmp == true ){
auto pr_next2 = &*pr_next;
int nblocklen = CheckUtf7DPart( &*pr, pr_end - pr, &pr_next2, &berror_tmp );
if( berror_tmp ){
berror = true;
}
pw += _Utf7SetDToUni_block( pr, nblocklen, pw );

pr = pr_next; // 次の読み込み位置を取得
if( pr_next >= pr_end ){
pr_next += pr_next2 - &*pr_next;
pw += _Utf7SetDToUni_block( &*pr, nblocklen, pw );
if( pr_end <= pr_next || !*pr_next ){
break;
}
pr = pr_next; // 次の読み込み位置を取得

// UTF-7 Set B 部分のチェック
nblocklen = CheckUtf7BPart( pr, pr_end-pr, &pr_next, &berror_tmp, UC_LOOSE );
nblocklen = CheckUtf7BPart( &*pr, pr_end - pr, &pr_next2, &berror_tmp, UC_LOOSE );
pr_next += pr_next2 - &*pr_next;
{
// エラーがあってもできるところまでデコード
if( berror_tmp ){
Expand All @@ -114,14 +122,14 @@ int CUtf7::Utf7ToUni( const char* pSrc, const int nSrcLen, wchar_t* pDst, bool*
*pw = L'+';
++pw;
}else{
pw += _Utf7SetBToUni_block( pr, nblocklen, pw, &berror_tmp );
if( berror_tmp != false ){
pw += _Utf7SetBToUni_block( &*pr, nblocklen, pw, &berror_tmp );
if( berror_tmp ){
berror = true;
}
}
}
pr = pr_next; // 次の読み込み位置を取得
}while( pr_next < pr_end );
}

if( pbError ){
*pbError = berror;
Expand All @@ -135,7 +143,7 @@ int CUtf7::Utf7ToUni( const char* pSrc, const int nSrcLen, wchar_t* pDst, bool*
EConvertResult CUtf7::UTF7ToUnicode( const CMemory& cSrc, CNativeW* pDstMem )
{
// エラー状態:
bool bError;
bool bError = false;

// データ取得
int nDataLen = cSrc.GetRawLength();
Expand Down Expand Up @@ -207,7 +215,7 @@ int CUtf7::_UniToUtf7SetB_block( const wchar_t* pSrc, const int nSrcLen, char* p
return pw - pDst;
}

int CUtf7::UniToUtf7( const wchar_t* pSrc, const int nSrcLen, char* pDst )
int CUtf7::UniToUtf7( const wchar_t* pSrc, const int nSrcLen, char* pDst, int nDstLen )
{
const wchar_t *pr, *pr_base;
const wchar_t* pr_end;
Expand All @@ -229,10 +237,12 @@ int CUtf7::UniToUtf7( const wchar_t* pSrc, const int nSrcLen, char* pDst )

if( *pr == L'+' ){
// '+' → "+-"
pw[0] = '+';
pw[1] = '-';
if( nDstLen < pw + 2 - pDst ){
break;
}
*(pw++) = '+';
*(pw++) = '-';
++pr;
pw += 2;
}else{
for( ; pr < pr_end; ++pr ){
if( IsUtf7SetD(*pr) ){
Expand All @@ -258,18 +268,15 @@ EConvertResult CUtf7::UnicodeToUTF7( const CNativeW& cSrc, CMemory* pDstMem )

// 出力先バッファの確保
// 最大で、変換元のデータ長の5倍。
char *pDst = new (std::nothrow) char[ nSrcLen * 5 + 1 ]; // * → +ACo-
if( pDst == NULL ){
return RESULT_FAILURE;
}
int nDstLen = nSrcLen * 5;
std::string dst( nDstLen, char() ); // * → +ACo-
auto pDst = dst.data();

// 変換
int nDstLen = UniToUtf7( pSrc, nSrcLen, pDst );
nDstLen = UniToUtf7( pSrc, nSrcLen, pDst, nDstLen );

// pMem にデータをセット
pDstMem->SetRawDataHoldBuffer( pDst, nDstLen );

delete [] pDst;

return RESULT_COMPLETE;
}
2 changes: 1 addition & 1 deletion sakura_core/charset/CUtf7.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@ class CUtf7 : public CCodeBase{

static int _UniToUtf7SetD_block( const wchar_t* pSrc, const int nSrcLen, char* pDst );
static int _UniToUtf7SetB_block( const wchar_t* pSrc, const int nSrcLen, char* pDst );
static int UniToUtf7( const wchar_t* pSrc, const int nSrcLen, char* pDst );
static int UniToUtf7( const wchar_t* pSrc, const int nSrcLen, char* pDst, int nDstLen );
};
#endif /* SAKURA_CUTF7_55498766_1C8A_416B_9F39_88D3D83B8B65_H_ */
16 changes: 8 additions & 8 deletions sakura_core/charset/codechecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,7 @@ EndFunc:;
戻り値と ppNextChar に格納されるポインタは使えない。
1つ以上のエラーが見つかれば候補から外れるのでそういう適当な仕様に。
*/
int CheckUtf7DPart( const char *pS, const int nLen, char **ppNextChar, bool *pbError )
int CheckUtf7DPart( const char *pS, size_t nLen, const char **ppNextChar, bool *pbError )
{
const char *pr, *pr_end;
bool berror = false;
Expand Down Expand Up @@ -921,11 +921,11 @@ int CheckUtf7DPart( const char *pS, const int nLen, char **ppNextChar, bool *pbE

if( pr < pr_end ){
// '+' をスキップ
*ppNextChar = const_cast<char*>(pr) + 1;
*ppNextChar = pr + 1;
}else{
*ppNextChar = const_cast<char*>(pr);
*ppNextChar = pr;
}
return pr - pS;
return static_cast<int>( pr - pS );
}

/*!
Expand All @@ -937,7 +937,7 @@ int CheckUtf7DPart( const char *pS, const int nLen, char **ppNextChar, bool *pbE
@note この関数の前に CheckUtf7DPart() が実行される必要がある。
*/
int CheckUtf7BPart( const char *pS, const int nLen, char **ppNextChar, bool *pbError, const int nOption, bool* pbNoAddPoint )
int CheckUtf7BPart( const char *pS, size_t nLen, const char **ppNextChar, bool *pbError, const int nOption, bool* pbNoAddPoint )
{
const char *pr, *pr_end;
bool berror_found, bminus_found;
Expand Down Expand Up @@ -969,15 +969,15 @@ int CheckUtf7BPart( const char *pS, const int nLen, char **ppNextChar, bool *pbE
// セットBの文字でなくなるまでループ
if( !IsBase64(*pr) ){
if( *pr == '-' ){
bminus_found= true;
bminus_found = true;
}else{
bminus_found = false;
}
break;
}
}

nchecklen = pr - pS;
nchecklen = static_cast<int>( pr - pS );

// 保護コード
if( nchecklen < 1 ){
Expand Down Expand Up @@ -1065,7 +1065,7 @@ EndFunc:;

if( (berror_found == false || UC_LOOSE == (nOption & UC_LOOSE)) && (pr < pr_end && bminus_found == true) ){
// '-' をスキップ。
*ppNextChar = const_cast<char*>(pr) + 1;
*ppNextChar = pr + 1;
}else{
*ppNextChar = const_cast<char*>(pr);

Expand Down
4 changes: 2 additions & 2 deletions sakura_core/charset/codechecker.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,6 @@ int CheckUtf8Char( const char*, const int, ECharSet*, const bool bAllow4byteCode
int CheckUtf8Char2( const char*, const int, ECharSet*, const bool bAllow4byteCode, const int nOption );
int CheckCesu8Char( const char*, const int, ECharSet*, const int nOption );
// UTF-7 フォーマットチェック
int CheckUtf7DPart( const char*, const int, char **ppNextChar, bool *pbError );
int CheckUtf7BPart( const char*, const int, char **ppNextChar, bool *pbError, const int nOption, bool *pbNoAddPoint = NULL );
int CheckUtf7DPart( const char* pS, size_t nLen, const char **ppNextChar, bool *pbError );
int CheckUtf7BPart( const char* pS, size_t nLen, const char **ppNextChar, bool *pbError, const int nOption, bool *pbNoAddPoint = NULL );
#endif /* SAKURA_CODECHECKER_62A18A31_2ECD_47B6_AEE1_38EDDAD3FF2B_H_ */
51 changes: 51 additions & 0 deletions tests/unittests/test-ccodebase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,57 @@ TEST(CCodeBase, codeUtf8_OracleImplementation)
ASSERT_TRUE( bComplete2_2 );
}

/*!
* @brief 文字コード変換のテスト
*/
TEST(CCodeBase, codeUtf7)
{
const auto eCodeType = CODE_UTF7;
auto pCodeBase = CCodeFactory::CreateCodeBase(eCodeType);

// 7bit ASCII範囲(UTF-7仕様)
constexpr const auto& mbsAscii = "+AAEAAgADAAQABQAGAAcACA-\t\n+AAsADA-\r+AA4ADwAQABEAEgATABQAFQAWABcAGAAZABoAGwAcAB0AHgAf- +ACEAIgAjACQAJQAm-'()+ACoAKw-,-./0123456789:+ADsAPAA9AD4-?+AEA-ABCDEFGHIJKLMNOPQRSTUVWXYZ+AFsAXABdAF4AXwBg-abcdefghijklmnopqrstuvwxyz+AHsAfAB9AH4Afw-";
constexpr const auto& wcsAscii = L"\x01\x02\x03\x04\x05\x06\a\b\t\n\v\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F";

bool bComplete1_1 = false;
auto encoded1 = pCodeBase->CodeToUnicode(BinarySequenceView(reinterpret_cast<const std::byte*>(mbsAscii), _countof(mbsAscii)), &bComplete1_1);
EXPECT_STREQ(wcsAscii, encoded1.GetStringPtr());
EXPECT_TRUE(bComplete1_1);

bool bComplete1_2 = false;
auto decoded1 = pCodeBase->UnicodeToCode(encoded1, &bComplete1_2);
EXPECT_EQ(0, memcmp(mbsAscii, decoded1.data(), decoded1.size()));
EXPECT_TRUE(bComplete1_2);

// かな漢字の変換(UTF-7仕様)
constexpr const auto& wcsKanaKanji = L"カナかなカナ漢字";
constexpr const auto& mbsKanaKanji = "+/3b/hTBLMGowqzDKbyJbVw-";

bool bComplete2_1 = false;
auto encoded2 = pCodeBase->CodeToUnicode(BinarySequenceView(reinterpret_cast<const std::byte*>(mbsKanaKanji), _countof(mbsKanaKanji)), &bComplete2_1);
ASSERT_STREQ(wcsKanaKanji, encoded2.GetStringPtr());
ASSERT_TRUE(bComplete2_1);

bool bComplete2_2 = false;
auto decoded2 = pCodeBase->UnicodeToCode(encoded2, &bComplete2_2);
ASSERT_EQ(0, memcmp(mbsKanaKanji, decoded2.data(), decoded2.size()));
ASSERT_TRUE(bComplete2_2);

// UTF-7仕様
constexpr const auto& wcsPlusPlus = L"C++";
constexpr const auto& mbsPlusPlus = "C+-+-";

bool bComplete5_1 = false;
auto encoded5 = pCodeBase->CodeToUnicode(BinarySequenceView(reinterpret_cast<const std::byte*>(mbsPlusPlus), _countof(mbsPlusPlus)), &bComplete5_1);
ASSERT_STREQ(wcsPlusPlus, encoded5.GetStringPtr());
ASSERT_TRUE(bComplete5_1);

bool bComplete5_2 = false;
auto decoded5 = pCodeBase->UnicodeToCode(encoded5, &bComplete5_2);
ASSERT_EQ(0, memcmp(mbsPlusPlus, decoded5.data(), decoded5.size()));
ASSERT_TRUE(bComplete5_2);
}

/*!
* @brief 文字コード変換のテスト
*/
Expand Down

0 comments on commit 333f2c1

Please sign in to comment.