Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions devel/210_18.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# [210_18] `(liii hashlib)` 文件哈希能力补充

## 任务相关的代码文件
- `src/goldfish.hpp`
- `goldfish/liii/hashlib.scm`
- `tests/goldfish/liii/hashlib-test.scm`

## 如何测试

```shell
xmake f -c
xmake
./bin/goldfish tests/goldfish/liii/hashlib-test.scm
```

## 2026/2/12 新增文件哈希

### What

1. 在 `(liii hashlib)` 中新增文件哈希接口:
- `md5-file`
- `sha1-file`
- `sha256-file`
2. 在 C++ glue 层新增对应底层函数:
- `g_md5-file`
- `g_sha1-file`
- `g_sha256-file`
3. 更新 `hashlib-test.scm`,补充文件哈希测试(普通内容与空文件与200mb文件)。

### Why

1. 提供对文件内容直接计算哈希的能力,避免业务侧自行读文件再哈希。
2. 底层符号名与用途对齐,避免将“字符串哈希”和“文件哈希”语义混淆。

7 changes: 6 additions & 1 deletion goldfish/liii/hashlib.scm
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,17 @@
; under the License.
;
(define-library (liii hashlib)
(export md5 sha1 sha256)
(export md5 sha1 sha256
md5-file sha1-file sha256-file)
(begin

(define (md5 str) (g_md5 str))
(define (sha1 str) (g_sha1 str))
(define (sha256 str) (g_sha256 str))

(define (md5-file path) (g_md5-file path))
(define (sha1-file path) (g_sha1-file path))
(define (sha256-file path) (g_sha256-file path))

) ; end of begin
) ; end of define-library
251 changes: 192 additions & 59 deletions src/goldfish.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,83 +906,217 @@ glue_liii_uuid (s7_scheme* sc) {
glue_uuid4 (sc);
}

static s7_pointer f_md5(s7_scheme* sc, s7_pointer args) {
const char* searchString = s7_string(s7_car(args));
tb_size_t len = tb_strlen(searchString);
tb_byte_t ob[16];
tb_char_t hex_output[33] = {0};
tb_md5_t md5;
tb_md5_init(&md5, 0);
if (len > 0) {
tb_md5_spak(&md5, (tb_byte_t const*)searchString, len);
inline void
hash_bytes_to_hex (const tb_byte_t* bytes, tb_size_t length, tb_char_t* hex_output) {
for (tb_size_t i= 0; i < length; ++i) {
tb_snprintf (hex_output + (i << 1), 3, "%02x", bytes[i]);
}
hex_output[length << 1]= '\0';
}

static bool
md5_file_to_hex (const char* path, tb_char_t* hex_output) {
if (!path) {
return false;
}

tb_file_ref_t file= tb_file_init (path, TB_FILE_MODE_RO);
if (file == tb_null) {
return false;
}

tb_md5_t md5;
tb_md5_init (&md5, 0);

tb_size_t size = tb_file_size (file);
tb_size_t offset= 0;
tb_byte_t buffer[4096];
while (offset < size) {
tb_size_t want = ((size - offset) > sizeof (buffer)) ? sizeof (buffer) : (size - offset);
tb_size_t real_size= tb_file_read (file, buffer, want);
if (real_size == 0) {
tb_file_exit (file);
return false;
}
tb_md5_exit(&md5, ob, 16);
for (tb_size_t i = 0; i < 16; ++i) {
tb_snprintf(hex_output + (i << 1), 3, "%02x", ob[i]);
tb_md5_spak (&md5, buffer, real_size);
offset += real_size;
}

tb_file_exit (file);

tb_byte_t digest[16];
tb_md5_exit (&md5, digest, sizeof (digest));
hash_bytes_to_hex (digest, sizeof (digest), hex_output);
return true;
}

static bool
sha_file_to_hex (const char* path, tb_size_t mode, tb_size_t digest_size, tb_char_t* hex_output) {
if (!path) {
return false;
}

tb_file_ref_t file= tb_file_init (path, TB_FILE_MODE_RO);
if (file == tb_null) {
return false;
}

tb_sha_t sha;
tb_sha_init (&sha, mode);

tb_size_t size = tb_file_size (file);
tb_size_t offset= 0;
tb_byte_t buffer[4096];
while (offset < size) {
tb_size_t want = ((size - offset) > sizeof (buffer)) ? sizeof (buffer) : (size - offset);
tb_size_t real_size= tb_file_read (file, buffer, want);
if (real_size == 0) {
tb_file_exit (file);
return false;
}
return s7_make_string(sc, hex_output);
tb_sha_spak (&sha, buffer, real_size);
offset += real_size;
}

tb_file_exit (file);

tb_byte_t digest[32];
tb_sha_exit (&sha, digest, digest_size);
hash_bytes_to_hex (digest, digest_size, hex_output);
return true;
}

inline void
glue_md5(s7_scheme* sc) {
const char* name = "g_md5";
const char* desc = "(g_md5 str) => string";
glue_define(sc, name, desc, f_md5, 1, 0);
static s7_pointer
f_md5 (s7_scheme* sc, s7_pointer args) {
const char* search_string= s7_string (s7_car (args));
tb_size_t len = tb_strlen (search_string);
tb_byte_t digest[16];
tb_char_t hex_output[33]= {0};
tb_md5_t md5;

tb_md5_init (&md5, 0);
if (len > 0) {
tb_md5_spak (&md5, (tb_byte_t const*) search_string, len);
}
tb_md5_exit (&md5, digest, sizeof (digest));
hash_bytes_to_hex (digest, sizeof (digest), hex_output);
return s7_make_string (sc, hex_output);
}

static s7_pointer f_sha1(s7_scheme* sc, s7_pointer args) {
const char* searchString = s7_string(s7_car(args));
tb_size_t len = tb_strlen(searchString);
tb_byte_t ob[20]; // SHA1 produces 20 bytes
tb_char_t hex_output[41] = {0}; // 20 bytes * 2 hex digits per byte + null terminator
tb_sha_t sha;
tb_sha_init(&sha, 160); // TB_SHA_MODE_SHA1_160 = 160
if (len > 0) {
tb_sha_spak(&sha, (tb_byte_t const*)searchString, len);
}
tb_sha_exit(&sha, ob, 20);
for (tb_size_t i = 0; i < 20; ++i) {
tb_snprintf(hex_output + (i << 1), 3, "%02x", ob[i]);
}
return s7_make_string(sc, hex_output);
inline void
glue_md5 (s7_scheme* sc) {
const char* name= "g_md5";
const char* desc= "(g_md5 str) => string";
glue_define (sc, name, desc, f_md5, 1, 0);
}

static s7_pointer
f_md5_file (s7_scheme* sc, s7_pointer args) {
const char* path= s7_string (s7_car (args));
tb_char_t hex_output[33]= {0};
if (!md5_file_to_hex (path, hex_output)) {
return s7_make_boolean (sc, false);
}
return s7_make_string (sc, hex_output);
}

inline void
glue_sha1(s7_scheme* sc) {
const char* name = "g_sha1";
const char* desc = "(g_sha1 str) => string";
glue_define(sc, name, desc, f_sha1, 1, 0);
}

static s7_pointer f_sha256(s7_scheme* sc, s7_pointer args) {
const char* searchString = s7_string(s7_car(args));
tb_size_t len = tb_strlen(searchString);
tb_byte_t ob[32]; // SHA256 produces 32 bytes
tb_char_t hex_output[65] = {0}; // 32 bytes * 2 hex digits per byte + null terminator
tb_sha_t sha;
tb_sha_init(&sha, 256); // TB_SHA_MODE_SHA2_256 = 256
if (len > 0) {
tb_sha_spak(&sha, (tb_byte_t const*)searchString, len);
}
tb_sha_exit(&sha, ob, 32);
for (tb_size_t i = 0; i < 32; ++i) {
tb_snprintf(hex_output + (i << 1), 3, "%02x", ob[i]);
}
return s7_make_string(sc, hex_output);
glue_md5_file (s7_scheme* sc) {
const char* name= "g_md5-file";
const char* desc= "(g_md5-file path) => string|#f";
glue_define (sc, name, desc, f_md5_file, 1, 0);
}

static s7_pointer
f_sha1 (s7_scheme* sc, s7_pointer args) {
const char* search_string= s7_string (s7_car (args));
tb_size_t len = tb_strlen (search_string);
tb_byte_t digest[20];
tb_char_t hex_output[41]= {0};
tb_sha_t sha;

tb_sha_init (&sha, 160);
if (len > 0) {
tb_sha_spak (&sha, (tb_byte_t const*) search_string, len);
}
tb_sha_exit (&sha, digest, sizeof (digest));
hash_bytes_to_hex (digest, sizeof (digest), hex_output);
return s7_make_string (sc, hex_output);
}

inline void
glue_sha256(s7_scheme* sc) {
const char* name = "g_sha256";
const char* desc = "(g_sha256 str) => string";
glue_define(sc, name, desc, f_sha256, 1, 0);
glue_sha1 (s7_scheme* sc) {
const char* name= "g_sha1";
const char* desc= "(g_sha1 str) => string";
glue_define (sc, name, desc, f_sha1, 1, 0);
}

static s7_pointer
f_sha1_file (s7_scheme* sc, s7_pointer args) {
const char* path= s7_string (s7_car (args));
tb_char_t hex_output[41]= {0};
if (!sha_file_to_hex (path, 160, 20, hex_output)) {
return s7_make_boolean (sc, false);
}
return s7_make_string (sc, hex_output);
}

inline void
glue_sha1_file (s7_scheme* sc) {
const char* name= "g_sha1-file";
const char* desc= "(g_sha1-file path) => string|#f";
glue_define (sc, name, desc, f_sha1_file, 1, 0);
}

static s7_pointer
f_sha256 (s7_scheme* sc, s7_pointer args) {
const char* search_string= s7_string (s7_car (args));
tb_size_t len = tb_strlen (search_string);
tb_byte_t digest[32];
tb_char_t hex_output[65]= {0};
tb_sha_t sha;

tb_sha_init (&sha, 256);
if (len > 0) {
tb_sha_spak (&sha, (tb_byte_t const*) search_string, len);
}
tb_sha_exit (&sha, digest, sizeof (digest));
hash_bytes_to_hex (digest, sizeof (digest), hex_output);
return s7_make_string (sc, hex_output);
}

inline void
glue_sha256 (s7_scheme* sc) {
const char* name= "g_sha256";
const char* desc= "(g_sha256 str) => string";
glue_define (sc, name, desc, f_sha256, 1, 0);
}

static s7_pointer
f_sha256_file (s7_scheme* sc, s7_pointer args) {
const char* path= s7_string (s7_car (args));
tb_char_t hex_output[65]= {0};
if (!sha_file_to_hex (path, 256, 32, hex_output)) {
return s7_make_boolean (sc, false);
}
return s7_make_string (sc, hex_output);
}

inline void
glue_sha256_file (s7_scheme* sc) {
const char* name= "g_sha256-file";
const char* desc= "(g_sha256-file path) => string|#f";
glue_define (sc, name, desc, f_sha256_file, 1, 0);
}

inline void
glue_liii_hashlib (s7_scheme* sc) {
glue_md5 (sc);
glue_md5_file (sc);
glue_sha1 (sc);
glue_sha1_file (sc);
glue_sha256 (sc);
glue_sha256_file (sc);
}


Expand Down Expand Up @@ -2102,4 +2236,3 @@ repl_for_community_edition (s7_scheme* sc, int argc, char** argv) {
}

} // namespace goldfish

36 changes: 34 additions & 2 deletions tests/goldfish/liii/hashlib-test.scm
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
; under the License.
;

(import (liii check) (liii hashlib))
(import (liii check) (liii hashlib) (liii path) (liii os))

(check (md5 "") => "d41d8cd98f00b204e9800998ecf8427e")
(check (md5 "hello") => "5d41402abc4b2a76b9719d911017c592")
Expand Down Expand Up @@ -42,4 +42,36 @@
(check (sha256 "!@#$%^&*()") => "95ce789c5c9d18490972709838ca3a9719094bca3ac16332cfec0652b0236141")
(check (sha256 "Hello") => "185f8db32271fe25f561a6fc938b2e264306ec304eda518007d1764826381969")

(check-report)
(let ((tmp-file "tests/resources/hashlib-test-temp.txt")
(content "hello"))
(path-write-text tmp-file content)
(check (md5-file tmp-file) => (md5 content))
(check (sha1-file tmp-file) => (sha1 content))
(check (sha256-file tmp-file) => (sha256 content))

(path-write-text tmp-file "")
(check (md5-file tmp-file) => (md5 ""))
(check (sha1-file tmp-file) => (sha1 ""))
(check (sha256-file tmp-file) => (sha256 ""))
(delete-file tmp-file))

;; Large file (200MB) hash test
(let* ((large-url "http://ipv4.download.thinkbroadband.com/200MB.zip")
(large-file "tests/resources/hashlib-test-large-200MB.zip")
(download-cmd (string-append "curl -L --fail --retry 2 --connect-timeout 30 -o \""
large-file
"\" \""
large-url
"\""))
(ret (os-call download-cmd)))
(check ret => 0)
(check (path-getsize large-file) => 209715200)
(check (md5-file large-file) => "3389a0b30e05ef6613ccbdae5d9ec0bd")
(check (sha1-file large-file) => "fd72443c217d301f8959b5e721f8f0b6fc5eb127")
(check (sha256-file large-file) => "d14b73150642f30d2342e6620fa537ea273a58b8b751fc5af8f4aabe809f8fc4")
(when (path-exists? large-file)
(delete-file large-file)))



(check-report)