mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 04:48:53 +00:00
136 lines
3.4 KiB
TypeScript
136 lines
3.4 KiB
TypeScript
import type { TextRect } from './types';
|
|
|
|
interface WordSegment {
|
|
text: string;
|
|
start: number;
|
|
end: number;
|
|
}
|
|
|
|
const CJK_REGEX = /[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF]/u;
|
|
|
|
function hasCJK(text: string): boolean {
|
|
return CJK_REGEX.test(text);
|
|
}
|
|
|
|
function getWordSegments(text: string): WordSegment[] {
|
|
const granularity = hasCJK(text) ? 'grapheme' : 'word';
|
|
const segmenter = new Intl.Segmenter(undefined, { granularity });
|
|
return Array.from(segmenter.segment(text)).map(({ segment, index }) => ({
|
|
text: segment,
|
|
start: index,
|
|
end: index + segment.length,
|
|
}));
|
|
}
|
|
|
|
function getRangeRects(range: Range, fullText: string): TextRect[] {
|
|
const rects = Array.from(range.getClientRects());
|
|
const textRects: TextRect[] = [];
|
|
|
|
if (rects.length === 0) return textRects;
|
|
|
|
// If there's only one rect, use the full text
|
|
if (rects.length === 1) {
|
|
const rect = rects[0];
|
|
textRects.push({
|
|
rect: {
|
|
x: rect.x,
|
|
y: rect.y,
|
|
w: rect.width,
|
|
h: rect.height,
|
|
},
|
|
text: fullText,
|
|
});
|
|
return textRects;
|
|
}
|
|
|
|
const segments = getWordSegments(fullText);
|
|
|
|
// Calculate the total width and average width per character
|
|
const totalWidth = Math.floor(
|
|
rects.reduce((sum, rect) => sum + rect.width, 0)
|
|
);
|
|
const charWidthEstimate = totalWidth / fullText.length;
|
|
|
|
let currentRect = 0;
|
|
let currentSegments: WordSegment[] = [];
|
|
let currentWidth = 0;
|
|
|
|
segments.forEach(segment => {
|
|
const segmentWidth = segment.text.length * charWidthEstimate;
|
|
if (
|
|
currentWidth + segmentWidth > rects[currentRect]?.width &&
|
|
currentSegments.length > 0
|
|
) {
|
|
const rect = rects[currentRect];
|
|
textRects.push({
|
|
rect: {
|
|
x: rect.x,
|
|
y: rect.y,
|
|
w: rect.width,
|
|
h: rect.height,
|
|
},
|
|
text: currentSegments.map(seg => seg.text).join(''),
|
|
});
|
|
|
|
currentRect++;
|
|
currentSegments = [segment];
|
|
currentWidth = segmentWidth;
|
|
} else {
|
|
currentSegments.push(segment);
|
|
currentWidth += segmentWidth;
|
|
}
|
|
});
|
|
|
|
// Handle remaining segments if any
|
|
if (currentSegments.length > 0) {
|
|
const rect = rects[Math.min(currentRect, rects.length - 1)];
|
|
textRects.push({
|
|
rect: {
|
|
x: rect.x,
|
|
y: rect.y,
|
|
w: rect.width,
|
|
h: rect.height,
|
|
},
|
|
text: currentSegments.map(seg => seg.text).join(''),
|
|
});
|
|
}
|
|
|
|
return textRects;
|
|
}
|
|
|
|
export function getSentenceRects(
|
|
element: Element,
|
|
sentence: string
|
|
): TextRect[] {
|
|
const textNode = Array.from(element.childNodes).find(
|
|
node => node.nodeType === Node.TEXT_NODE
|
|
);
|
|
|
|
if (!textNode) return [];
|
|
|
|
const text = textNode.textContent || '';
|
|
let rects: TextRect[] = [];
|
|
let startIndex = 0;
|
|
|
|
// Find all occurrences of the sentence and ensure we capture complete words
|
|
while ((startIndex = text.indexOf(sentence, startIndex)) !== -1) {
|
|
const range = document.createRange();
|
|
let endIndex = startIndex + sentence.length;
|
|
|
|
range.setStart(textNode, startIndex);
|
|
range.setEnd(textNode, endIndex);
|
|
|
|
rects = rects.concat(
|
|
getRangeRects(range, text.slice(startIndex, endIndex))
|
|
);
|
|
startIndex = endIndex;
|
|
}
|
|
|
|
return rects;
|
|
}
|
|
|
|
export function segmentSentences(text: string): string[] {
|
|
const segmenter = new Intl.Segmenter(undefined, { granularity: 'sentence' });
|
|
return Array.from(segmenter.segment(text)).map(({ segment }) => segment);
|
|
}
|