From 6ce4d22a46e87305e9136cb46a6ca21ac5df2481 Mon Sep 17 00:00:00 2001 From: wumo Date: Thu, 12 Feb 2026 14:27:54 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=93=88=E5=B8=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- devel/210_18.md | 34 ++++ goldfish/liii/hashlib.scm | 7 +- src/goldfish.hpp | 251 ++++++++++++++++++++------- tests/goldfish/liii/hashlib-test.scm | 17 +- 4 files changed, 247 insertions(+), 62 deletions(-) create mode 100644 devel/210_18.md diff --git a/devel/210_18.md b/devel/210_18.md new file mode 100644 index 00000000..bcee79bf --- /dev/null +++ b/devel/210_18.md @@ -0,0 +1,34 @@ +# [210_17] `(liii hashlib)` 文件哈希能力补充 + +## 任务相关的代码文件 +- `src/goldfish.hpp` +- `goldfish/liii/hashlib.scm` +- `tests/goldfish/liii/hashlib-test.scm` + +## 如何测试 + +```shell +xmake f -c +xmake +./bin/goldfish tests/goldfish/liii/hashlib-test.scm +``` + +## 2026/2/12 新增文件哈希 + +### What + +1. 在 `(liii hashlib)` 中新增文件哈希接口: + - `md5-file` + - `sha1-file` + - `sha256-file` +2. 在 C++ glue 层新增对应底层函数: + - `g_md5-file` + - `g_sha1-file` + - `g_sha256-file` +5. 更新 `hashlib-test.scm`,补充文件哈希测试(普通内容与空文件)。 + +### Why + +1. 提供对文件内容直接计算哈希的能力,避免业务侧自行读文件再哈希。 +2. 底层符号名与用途对齐,避免将“字符串哈希”和“文件哈希”语义混淆。 + diff --git a/goldfish/liii/hashlib.scm b/goldfish/liii/hashlib.scm index f9106908..d98a58db 100644 --- a/goldfish/liii/hashlib.scm +++ b/goldfish/liii/hashlib.scm @@ -14,12 +14,17 @@ ; under the License. ; (define-library (liii hashlib) - (export md5 sha1 sha256) + (export md5 sha1 sha256 + md5-file sha1-file sha256-file) (begin (define (md5 str) (g_md5 str)) (define (sha1 str) (g_sha1 str)) (define (sha256 str) (g_sha256 str)) + (define (md5-file path) (g_md5-file path)) + (define (sha1-file path) (g_sha1-file path)) + (define (sha256-file path) (g_sha256-file path)) + ) ; end of begin ) ; end of define-library diff --git a/src/goldfish.hpp b/src/goldfish.hpp index 5d46c7cc..7b72f6e4 100644 --- a/src/goldfish.hpp +++ b/src/goldfish.hpp @@ -906,83 +906,217 @@ glue_liii_uuid (s7_scheme* sc) { glue_uuid4 (sc); } -static s7_pointer f_md5(s7_scheme* sc, s7_pointer args) { - const char* searchString = s7_string(s7_car(args)); - tb_size_t len = tb_strlen(searchString); - tb_byte_t ob[16]; - tb_char_t hex_output[33] = {0}; - tb_md5_t md5; - tb_md5_init(&md5, 0); - if (len > 0) { - tb_md5_spak(&md5, (tb_byte_t const*)searchString, len); +inline void +hash_bytes_to_hex (const tb_byte_t* bytes, tb_size_t length, tb_char_t* hex_output) { + for (tb_size_t i= 0; i < length; ++i) { + tb_snprintf (hex_output + (i << 1), 3, "%02x", bytes[i]); + } + hex_output[length << 1]= '\0'; +} + +static bool +md5_file_to_hex (const char* path, tb_char_t* hex_output) { + if (!path) { + return false; + } + + tb_file_ref_t file= tb_file_init (path, TB_FILE_MODE_RO); + if (file == tb_null) { + return false; + } + + tb_md5_t md5; + tb_md5_init (&md5, 0); + + tb_size_t size = tb_file_size (file); + tb_size_t offset= 0; + tb_byte_t buffer[4096]; + while (offset < size) { + tb_size_t want = ((size - offset) > sizeof (buffer)) ? sizeof (buffer) : (size - offset); + tb_size_t real_size= tb_file_read (file, buffer, want); + if (real_size == 0) { + tb_file_exit (file); + return false; } - tb_md5_exit(&md5, ob, 16); - for (tb_size_t i = 0; i < 16; ++i) { - tb_snprintf(hex_output + (i << 1), 3, "%02x", ob[i]); + tb_md5_spak (&md5, buffer, real_size); + offset += real_size; + } + + tb_file_exit (file); + + tb_byte_t digest[16]; + tb_md5_exit (&md5, digest, sizeof (digest)); + hash_bytes_to_hex (digest, sizeof (digest), hex_output); + return true; +} + +static bool +sha_file_to_hex (const char* path, tb_size_t mode, tb_size_t digest_size, tb_char_t* hex_output) { + if (!path) { + return false; + } + + tb_file_ref_t file= tb_file_init (path, TB_FILE_MODE_RO); + if (file == tb_null) { + return false; + } + + tb_sha_t sha; + tb_sha_init (&sha, mode); + + tb_size_t size = tb_file_size (file); + tb_size_t offset= 0; + tb_byte_t buffer[4096]; + while (offset < size) { + tb_size_t want = ((size - offset) > sizeof (buffer)) ? sizeof (buffer) : (size - offset); + tb_size_t real_size= tb_file_read (file, buffer, want); + if (real_size == 0) { + tb_file_exit (file); + return false; } - return s7_make_string(sc, hex_output); + tb_sha_spak (&sha, buffer, real_size); + offset += real_size; + } + + tb_file_exit (file); + + tb_byte_t digest[32]; + tb_sha_exit (&sha, digest, digest_size); + hash_bytes_to_hex (digest, digest_size, hex_output); + return true; } -inline void -glue_md5(s7_scheme* sc) { - const char* name = "g_md5"; - const char* desc = "(g_md5 str) => string"; - glue_define(sc, name, desc, f_md5, 1, 0); +static s7_pointer +f_md5 (s7_scheme* sc, s7_pointer args) { + const char* search_string= s7_string (s7_car (args)); + tb_size_t len = tb_strlen (search_string); + tb_byte_t digest[16]; + tb_char_t hex_output[33]= {0}; + tb_md5_t md5; + + tb_md5_init (&md5, 0); + if (len > 0) { + tb_md5_spak (&md5, (tb_byte_t const*) search_string, len); + } + tb_md5_exit (&md5, digest, sizeof (digest)); + hash_bytes_to_hex (digest, sizeof (digest), hex_output); + return s7_make_string (sc, hex_output); } -static s7_pointer f_sha1(s7_scheme* sc, s7_pointer args) { - const char* searchString = s7_string(s7_car(args)); - tb_size_t len = tb_strlen(searchString); - tb_byte_t ob[20]; // SHA1 produces 20 bytes - tb_char_t hex_output[41] = {0}; // 20 bytes * 2 hex digits per byte + null terminator - tb_sha_t sha; - tb_sha_init(&sha, 160); // TB_SHA_MODE_SHA1_160 = 160 - if (len > 0) { - tb_sha_spak(&sha, (tb_byte_t const*)searchString, len); - } - tb_sha_exit(&sha, ob, 20); - for (tb_size_t i = 0; i < 20; ++i) { - tb_snprintf(hex_output + (i << 1), 3, "%02x", ob[i]); - } - return s7_make_string(sc, hex_output); +inline void +glue_md5 (s7_scheme* sc) { + const char* name= "g_md5"; + const char* desc= "(g_md5 str) => string"; + glue_define (sc, name, desc, f_md5, 1, 0); +} + +static s7_pointer +f_md5_file (s7_scheme* sc, s7_pointer args) { + const char* path= s7_string (s7_car (args)); + tb_char_t hex_output[33]= {0}; + if (!md5_file_to_hex (path, hex_output)) { + return s7_make_boolean (sc, false); + } + return s7_make_string (sc, hex_output); } inline void -glue_sha1(s7_scheme* sc) { - const char* name = "g_sha1"; - const char* desc = "(g_sha1 str) => string"; - glue_define(sc, name, desc, f_sha1, 1, 0); -} - -static s7_pointer f_sha256(s7_scheme* sc, s7_pointer args) { - const char* searchString = s7_string(s7_car(args)); - tb_size_t len = tb_strlen(searchString); - tb_byte_t ob[32]; // SHA256 produces 32 bytes - tb_char_t hex_output[65] = {0}; // 32 bytes * 2 hex digits per byte + null terminator - tb_sha_t sha; - tb_sha_init(&sha, 256); // TB_SHA_MODE_SHA2_256 = 256 - if (len > 0) { - tb_sha_spak(&sha, (tb_byte_t const*)searchString, len); - } - tb_sha_exit(&sha, ob, 32); - for (tb_size_t i = 0; i < 32; ++i) { - tb_snprintf(hex_output + (i << 1), 3, "%02x", ob[i]); - } - return s7_make_string(sc, hex_output); +glue_md5_file (s7_scheme* sc) { + const char* name= "g_md5-file"; + const char* desc= "(g_md5-file path) => string|#f"; + glue_define (sc, name, desc, f_md5_file, 1, 0); +} + +static s7_pointer +f_sha1 (s7_scheme* sc, s7_pointer args) { + const char* search_string= s7_string (s7_car (args)); + tb_size_t len = tb_strlen (search_string); + tb_byte_t digest[20]; + tb_char_t hex_output[41]= {0}; + tb_sha_t sha; + + tb_sha_init (&sha, 160); + if (len > 0) { + tb_sha_spak (&sha, (tb_byte_t const*) search_string, len); + } + tb_sha_exit (&sha, digest, sizeof (digest)); + hash_bytes_to_hex (digest, sizeof (digest), hex_output); + return s7_make_string (sc, hex_output); } inline void -glue_sha256(s7_scheme* sc) { - const char* name = "g_sha256"; - const char* desc = "(g_sha256 str) => string"; - glue_define(sc, name, desc, f_sha256, 1, 0); +glue_sha1 (s7_scheme* sc) { + const char* name= "g_sha1"; + const char* desc= "(g_sha1 str) => string"; + glue_define (sc, name, desc, f_sha1, 1, 0); +} + +static s7_pointer +f_sha1_file (s7_scheme* sc, s7_pointer args) { + const char* path= s7_string (s7_car (args)); + tb_char_t hex_output[41]= {0}; + if (!sha_file_to_hex (path, 160, 20, hex_output)) { + return s7_make_boolean (sc, false); + } + return s7_make_string (sc, hex_output); +} + +inline void +glue_sha1_file (s7_scheme* sc) { + const char* name= "g_sha1-file"; + const char* desc= "(g_sha1-file path) => string|#f"; + glue_define (sc, name, desc, f_sha1_file, 1, 0); +} + +static s7_pointer +f_sha256 (s7_scheme* sc, s7_pointer args) { + const char* search_string= s7_string (s7_car (args)); + tb_size_t len = tb_strlen (search_string); + tb_byte_t digest[32]; + tb_char_t hex_output[65]= {0}; + tb_sha_t sha; + + tb_sha_init (&sha, 256); + if (len > 0) { + tb_sha_spak (&sha, (tb_byte_t const*) search_string, len); + } + tb_sha_exit (&sha, digest, sizeof (digest)); + hash_bytes_to_hex (digest, sizeof (digest), hex_output); + return s7_make_string (sc, hex_output); +} + +inline void +glue_sha256 (s7_scheme* sc) { + const char* name= "g_sha256"; + const char* desc= "(g_sha256 str) => string"; + glue_define (sc, name, desc, f_sha256, 1, 0); +} + +static s7_pointer +f_sha256_file (s7_scheme* sc, s7_pointer args) { + const char* path= s7_string (s7_car (args)); + tb_char_t hex_output[65]= {0}; + if (!sha_file_to_hex (path, 256, 32, hex_output)) { + return s7_make_boolean (sc, false); + } + return s7_make_string (sc, hex_output); +} + +inline void +glue_sha256_file (s7_scheme* sc) { + const char* name= "g_sha256-file"; + const char* desc= "(g_sha256-file path) => string|#f"; + glue_define (sc, name, desc, f_sha256_file, 1, 0); } inline void glue_liii_hashlib (s7_scheme* sc) { glue_md5 (sc); + glue_md5_file (sc); glue_sha1 (sc); + glue_sha1_file (sc); glue_sha256 (sc); + glue_sha256_file (sc); } @@ -2102,4 +2236,3 @@ repl_for_community_edition (s7_scheme* sc, int argc, char** argv) { } } // namespace goldfish - diff --git a/tests/goldfish/liii/hashlib-test.scm b/tests/goldfish/liii/hashlib-test.scm index 4a826626..87a12fd3 100644 --- a/tests/goldfish/liii/hashlib-test.scm +++ b/tests/goldfish/liii/hashlib-test.scm @@ -14,7 +14,7 @@ ; under the License. ; -(import (liii check) (liii hashlib)) +(import (liii check) (liii hashlib) (liii path)) (check (md5 "") => "d41d8cd98f00b204e9800998ecf8427e") (check (md5 "hello") => "5d41402abc4b2a76b9719d911017c592") @@ -42,4 +42,17 @@ (check (sha256 "!@#$%^&*()") => "95ce789c5c9d18490972709838ca3a9719094bca3ac16332cfec0652b0236141") (check (sha256 "Hello") => "185f8db32271fe25f561a6fc938b2e264306ec304eda518007d1764826381969") -(check-report) \ No newline at end of file +(let ((tmp-file "tests/resources/hashlib-test-temp.txt") + (content "hello")) + (path-write-text tmp-file content) + (check (md5-file tmp-file) => (md5 content)) + (check (sha1-file tmp-file) => (sha1 content)) + (check (sha256-file tmp-file) => (sha256 content)) + + (path-write-text tmp-file "") + (check (md5-file tmp-file) => (md5 "")) + (check (sha1-file tmp-file) => (sha1 "")) + (check (sha256-file tmp-file) => (sha256 "")) + (delete-file tmp-file)) + +(check-report) From 5b71e5deda689c41e193eb46aa9ab3506c6712a9 Mon Sep 17 00:00:00 2001 From: wumo Date: Thu, 12 Feb 2026 14:28:22 +0800 Subject: [PATCH 2/5] wip --- devel/210_18.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devel/210_18.md b/devel/210_18.md index bcee79bf..014a2b2b 100644 --- a/devel/210_18.md +++ b/devel/210_18.md @@ -1,4 +1,4 @@ -# [210_17] `(liii hashlib)` 文件哈希能力补充 +# [210_18] `(liii hashlib)` 文件哈希能力补充 ## 任务相关的代码文件 - `src/goldfish.hpp` From e7292a728f6cd4aa06a4c77871f3ad3832ed10ca Mon Sep 17 00:00:00 2001 From: wumo Date: Thu, 12 Feb 2026 14:39:49 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E8=A1=A5=E5=85=85=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/goldfish/liii/hashlib-test.scm | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/goldfish/liii/hashlib-test.scm b/tests/goldfish/liii/hashlib-test.scm index 87a12fd3..fb2914ba 100644 --- a/tests/goldfish/liii/hashlib-test.scm +++ b/tests/goldfish/liii/hashlib-test.scm @@ -14,7 +14,7 @@ ; under the License. ; -(import (liii check) (liii hashlib) (liii path)) +(import (liii check) (liii hashlib) (liii path) (liii os)) (check (md5 "") => "d41d8cd98f00b204e9800998ecf8427e") (check (md5 "hello") => "5d41402abc4b2a76b9719d911017c592") @@ -55,4 +55,23 @@ (check (sha256-file tmp-file) => (sha256 "")) (delete-file tmp-file)) +;; Large file (200MB) hash test +(let* ((large-url "http://ipv4.download.thinkbroadband.com/200MB.zip") + (large-file "tests/resources/hashlib-test-large-200MB.zip") + (download-cmd (string-append "curl -L --fail --retry 2 --connect-timeout 30 -o \"" + large-file + "\" \"" + large-url + "\"")) + (ret (os-call download-cmd))) + (check ret => 0) + (check (path-getsize large-file) => 209715200) + (check (md5-file large-file) => "3389a0b30e05ef6613ccbdae5d9ec0bd") + (check (sha1-file large-file) => "fd72443c217d301f8959b5e721f8f0b6fc5eb127") + (check (sha256-file large-file) => "d14b73150642f30d2342e6620fa537ea273a58b8b751fc5af8f4aabe809f8fc4") + (when (path-exists? large-file) + (delete-file large-file))) + + + (check-report) From 48b43092239eb5893face5954373a939f3218750 Mon Sep 17 00:00:00 2001 From: wumo Date: Thu, 12 Feb 2026 14:52:41 +0800 Subject: [PATCH 4/5] wip --- devel/210_18.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devel/210_18.md b/devel/210_18.md index 014a2b2b..734c4b1f 100644 --- a/devel/210_18.md +++ b/devel/210_18.md @@ -25,7 +25,7 @@ xmake - `g_md5-file` - `g_sha1-file` - `g_sha256-file` -5. 更新 `hashlib-test.scm`,补充文件哈希测试(普通内容与空文件)。 +3. 更新 `hashlib-test.scm`,补充文件哈希测试(普通内容与空文件)。 ### Why From 2e8e0ec709e2f240858541d7d24a7e6ab7a9e84e Mon Sep 17 00:00:00 2001 From: wumo Date: Thu, 12 Feb 2026 14:53:29 +0800 Subject: [PATCH 5/5] wip --- devel/210_18.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devel/210_18.md b/devel/210_18.md index 734c4b1f..8029b76b 100644 --- a/devel/210_18.md +++ b/devel/210_18.md @@ -25,7 +25,7 @@ xmake - `g_md5-file` - `g_sha1-file` - `g_sha256-file` -3. 更新 `hashlib-test.scm`,补充文件哈希测试(普通内容与空文件)。 +3. 更新 `hashlib-test.scm`,补充文件哈希测试(普通内容与空文件与200mb文件)。 ### Why