Files
AFFiNE-Mirror/blocksuite/playground/examples/renderer/text-utils.ts

136 lines
3.4 KiB
TypeScript

import type { TextRect } from './types';
interface WordSegment {
text: string;
start: number;
end: number;
}
const CJK_REGEX = /[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF]/u;
function hasCJK(text: string): boolean {
return CJK_REGEX.test(text);
}
function getWordSegments(text: string): WordSegment[] {
const granularity = hasCJK(text) ? 'grapheme' : 'word';
const segmenter = new Intl.Segmenter(undefined, { granularity });
return Array.from(segmenter.segment(text)).map(({ segment, index }) => ({
text: segment,
start: index,
end: index + segment.length,
}));
}
function getRangeRects(range: Range, fullText: string): TextRect[] {
const rects = Array.from(range.getClientRects());
const textRects: TextRect[] = [];
if (rects.length === 0) return textRects;
// If there's only one rect, use the full text
if (rects.length === 1) {
const rect = rects[0];
textRects.push({
rect: {
x: rect.x,
y: rect.y,
w: rect.width,
h: rect.height,
},
text: fullText,
});
return textRects;
}
const segments = getWordSegments(fullText);
// Calculate the total width and average width per character
const totalWidth = Math.floor(
rects.reduce((sum, rect) => sum + rect.width, 0)
);
const charWidthEstimate = totalWidth / fullText.length;
let currentRect = 0;
let currentSegments: WordSegment[] = [];
let currentWidth = 0;
segments.forEach(segment => {
const segmentWidth = segment.text.length * charWidthEstimate;
if (
currentWidth + segmentWidth > rects[currentRect]?.width &&
currentSegments.length > 0
) {
const rect = rects[currentRect];
textRects.push({
rect: {
x: rect.x,
y: rect.y,
w: rect.width,
h: rect.height,
},
text: currentSegments.map(seg => seg.text).join(''),
});
currentRect++;
currentSegments = [segment];
currentWidth = segmentWidth;
} else {
currentSegments.push(segment);
currentWidth += segmentWidth;
}
});
// Handle remaining segments if any
if (currentSegments.length > 0) {
const rect = rects[Math.min(currentRect, rects.length - 1)];
textRects.push({
rect: {
x: rect.x,
y: rect.y,
w: rect.width,
h: rect.height,
},
text: currentSegments.map(seg => seg.text).join(''),
});
}
return textRects;
}
export function getSentenceRects(
element: Element,
sentence: string
): TextRect[] {
const textNode = Array.from(element.childNodes).find(
node => node.nodeType === Node.TEXT_NODE
);
if (!textNode) return [];
const text = textNode.textContent || '';
let rects: TextRect[] = [];
let startIndex = 0;
// Find all occurrences of the sentence and ensure we capture complete words
while ((startIndex = text.indexOf(sentence, startIndex)) !== -1) {
const range = document.createRange();
let endIndex = startIndex + sentence.length;
range.setStart(textNode, startIndex);
range.setEnd(textNode, endIndex);
rects = rects.concat(
getRangeRects(range, text.slice(startIndex, endIndex))
);
startIndex = endIndex;
}
return rects;
}
export function segmentSentences(text: string): string[] {
const segmenter = new Intl.Segmenter(undefined, { granularity: 'sentence' });
return Array.from(segmenter.segment(text)).map(({ segment }) => segment);
}