1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-25 06:31:14 +02:00

try to calculate character width

This commit is contained in:
Philipp Otterbein 2024-12-15 02:29:56 +01:00 committed by Jörg Thalheim
parent 6a23803066
commit 92e30955b9
7 changed files with 1636 additions and 29 deletions

View file

@ -356,6 +356,7 @@
''^src/libutil/util\.cc$'' ''^src/libutil/util\.cc$''
''^src/libutil/util\.hh$'' ''^src/libutil/util\.hh$''
''^src/libutil/variant-wrapper\.hh$'' ''^src/libutil/variant-wrapper\.hh$''
''^src/libutil/widecharwidth/widechar_width\.h$'' # vendored source
''^src/libutil/windows/file-descriptor\.cc$'' ''^src/libutil/windows/file-descriptor\.cc$''
''^src/libutil/windows/file-path\.cc$'' ''^src/libutil/windows/file-path\.cc$''
''^src/libutil/windows/processes\.cc$'' ''^src/libutil/windows/processes\.cc$''

View file

@ -55,6 +55,10 @@ TEST(filterANSIEscapes, utf8)
ASSERT_EQ(filterANSIEscapes("fóóbär", true, 3), "fóó"); ASSERT_EQ(filterANSIEscapes("fóóbär", true, 3), "fóó");
ASSERT_EQ(filterANSIEscapes("f€€bär", true, 4), "f€€b"); ASSERT_EQ(filterANSIEscapes("f€€bär", true, 4), "f€€b");
ASSERT_EQ(filterANSIEscapes("f𐍈𐍈bär", true, 4), "f𐍈𐍈b"); ASSERT_EQ(filterANSIEscapes("f𐍈𐍈bär", true, 4), "f𐍈𐍈b");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 6), "f🔍bar");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 3), "f🔍");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 2), "f");
ASSERT_EQ(filterANSIEscapes("foo\u0301", true, 3), "foó");
} }
TEST(filterANSIEscapes, osc8) TEST(filterANSIEscapes, osc8)

View file

@ -108,6 +108,8 @@ deps_private += cpuid
nlohmann_json = dependency('nlohmann_json', version : '>= 3.9') nlohmann_json = dependency('nlohmann_json', version : '>= 3.9')
deps_public += nlohmann_json deps_public += nlohmann_json
cxx = meson.get_compiler('cpp')
config_h = configure_file( config_h = configure_file(
configuration : configdata, configuration : configdata,
output : 'config-util.hh', output : 'config-util.hh',
@ -168,6 +170,10 @@ sources = files(
) )
include_dirs = [include_directories('.')] include_dirs = [include_directories('.')]
if not cxx.has_header('widechar_width.h', required : false)
# use vendored widechar_width.h
include_dirs += include_directories('./widecharwidth')
endif
headers = [config_h] + files( headers = [config_h] + files(
'abstract-setting-to-json.hh', 'abstract-setting-to-json.hh',

View file

@ -29,6 +29,7 @@ mkMesonLibrary (finalAttrs: {
./nix-meson-build-support ./nix-meson-build-support
../../.version ../../.version
./.version ./.version
./widecharwidth
./meson.build ./meson.build
./meson.options ./meson.options
./linux/meson.build ./linux/meson.build

View file

@ -11,6 +11,53 @@
# include <sys/ioctl.h> # include <sys/ioctl.h>
#endif #endif
#include <unistd.h> #include <unistd.h>
#include <widechar_width.h>
namespace {
inline std::pair<int, size_t> charWidthUTF8Helper(std::string_view s)
{
size_t bytes = 1;
uint32_t ch = s[0];
uint32_t max = 1U << 7;
if ((ch & 0x80U) == 0U) {
} else if ((ch & 0xe0U) == 0xc0U) {
ch &= 0x1fU;
bytes = 2;
max = 1U << 11;
} else if ((ch & 0xf0U) == 0xe0U) {
ch &= 0x0fU;
bytes = 3;
max = 1U << 16;
} else if ((ch & 0xf8U) == 0xf0U) {
ch &= 0x07U;
bytes = 4;
max = 0x110000U;
} else {
return {bytes, bytes}; // invalid UTF-8 start byte
}
for (size_t i = 1; i < bytes; i++) {
if (i < s.size() && (s[i] & 0xc0) == 0x80) {
ch = (ch << 6) | (s[i] & 0x3f);
} else {
return {i, i}; // invalid UTF-8 encoding; assume one character per byte
}
}
int width = bytes; // in case of overlong encoding
if (ch < max) {
width = widechar_wcwidth(ch);
if (width == widechar_ambiguous) {
width = 1; // just a guess...
} else if (width == widechar_widened_in_9) {
width = 2;
} else if (width < 0) {
width = 0;
}
}
return {width, bytes};
}
}
namespace nix { namespace nix {
@ -30,7 +77,7 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
size_t w = 0; size_t w = 0;
auto i = s.begin(); auto i = s.begin();
while (w < (size_t) width && i != s.end()) { while (i != s.end()) {
if (*i == '\e') { if (*i == '\e') {
std::string e; std::string e;
@ -61,10 +108,12 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
} }
else if (*i == '\t') { else if (*i == '\t') {
i++; t += ' '; w++; do {
while (w < (size_t) width && w % 8) { if (++w > (size_t) width)
t += ' '; w++; return t;
} t += ' ';
} while (w % 8);
i++;
} }
else if (*i == '\r' || *i == '\a') else if (*i == '\r' || *i == '\a')
@ -72,35 +121,18 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
i++; i++;
else { else {
w++; auto [chWidth, bytes] = charWidthUTF8Helper({i, s.end()});
// Copy one UTF-8 character. w += chWidth;
if ((*i & 0xe0) == 0xc0) { if (w > (size_t) width) {
t += *i++; break;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++; }
} else if ((*i & 0xf0) == 0xe0) { t += {i, i + bytes};
t += *i++; i += bytes;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
} else if ((*i & 0xf8) == 0xf0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
}
} else
t += *i++;
} }
} }
return t; return t;
} }
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
static Sync<std::pair<unsigned short, unsigned short>> windowSize{{0, 0}}; static Sync<std::pair<unsigned short, unsigned short>> windowSize{{0, 0}};

View file

@ -0,0 +1,4 @@
widecharwidth - wcwidth implementation
Written in 2018 by ridiculous_fish
To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.

File diff suppressed because it is too large Load diff