Przeglądaj źródła

fix(store): handle emoji-only filenames in handelize (#302)

Convert emoji codepoints to hex representation (e.g. 🐘 → 1f418) instead
of crashing, so files like 🐘.md can be indexed without halting the
entire update process.

Fixes #302
Ning 2 miesięcy temu
rodzic
commit
dc777e3be0
2 zmienionych plików z 27 dodań i 1 usunięć
  1. 14 1
      src/store.ts
  2. 13 0
      test/store.helpers.unit.test.ts

+ 14 - 1
src/store.ts

@@ -958,16 +958,26 @@ export function getDocid(hash: string): string {
  * - Preserve folder structure (a/b/c/d.md stays structured)
  * - Preserve file extension
  */
+/** Replace emoji/symbol codepoints with their hex representation (e.g. 🐘 → 1f418) */
+function emojiToHex(str: string): string {
+  return str.replace(/(?:\p{So}\p{Mn}?|\p{Sk})+/gu, (run) => {
+    // Split the run into individual emoji and convert each to hex, dash-separated
+    return [...run].filter(c => /\p{So}|\p{Sk}/u.test(c))
+      .map(c => c.codePointAt(0)!.toString(16)).join('-');
+  });
+}
+
 export function handelize(path: string): string {
   if (!path || path.trim() === '') {
     throw new Error('handelize: path cannot be empty');
   }
 
   // Allow route-style "$" filenames while still rejecting paths with no usable content.
+  // Emoji (\p{So}) counts as valid content — they get converted to hex codepoints below.
   const segments = path.split('/').filter(Boolean);
   const lastSegment = segments[segments.length - 1] || '';
   const filenameWithoutExt = lastSegment.replace(/\.[^.]+$/, '');
-  const hasValidContent = /[\p{L}\p{N}$]/u.test(filenameWithoutExt);
+  const hasValidContent = /[\p{L}\p{N}\p{So}\p{Sk}$]/u.test(filenameWithoutExt);
   if (!hasValidContent) {
     throw new Error(`handelize: path "${path}" has no valid filename content`);
   }
@@ -979,6 +989,9 @@ export function handelize(path: string): string {
     .map((segment, idx, arr) => {
       const isLastSegment = idx === arr.length - 1;
 
+      // Convert emoji to hex codepoints before cleaning
+      segment = emojiToHex(segment);
+
       if (isLastSegment) {
         // For the filename (last segment), preserve the extension
         const extMatch = segment.match(/(\.[a-z0-9]+)$/i);

+ 13 - 0
test/store.helpers.unit.test.ts

@@ -137,6 +137,19 @@ describe("handelize", () => {
     expect(handelize("日本語-notes.md")).toBe("日本語-notes.md");
   });
 
+  test("handles emoji filenames (issue #302)", () => {
+    // Emoji-only filenames should convert to hex codepoints
+    expect(handelize("🐘.md")).toBe("1f418.md");
+    expect(handelize("🎉.md")).toBe("1f389.md");
+    // Emoji mixed with text
+    expect(handelize("notes 🐘.md")).toBe("notes-1f418.md");
+    expect(handelize("🐘 elephant.md")).toBe("1f418-elephant.md");
+    // Multiple emojis
+    expect(handelize("🐘🎉.md")).toBe("1f418-1f389.md");
+    // Emoji in directory names
+    expect(handelize("🐘/notes.md")).toBe("1f418/notes.md");
+  });
+
   test("handles dates and times in filenames", () => {
     expect(handelize("meeting-2025-01-15.md")).toBe("meeting-2025-01-15.md");
     expect(handelize("notes 2025/01/15.md")).toBe("notes-2025/01/15.md");