mirror of
https://github.com/toeverything/AFFiNE.git
synced 2026-02-13 21:05:19 +00:00
119 lines
3.4 KiB
TypeScript
119 lines
3.4 KiB
TypeScript
import type { TextRect } from './types';
|
|
|
|
interface WordSegment {
|
|
text: string;
|
|
start: number;
|
|
end: number;
|
|
}
|
|
|
|
function getWordSegments(text: string): WordSegment[] {
|
|
const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
|
|
return Array.from(segmenter.segment(text)).map(({ segment, index }) => ({
|
|
text: segment,
|
|
start: index,
|
|
end: index + segment.length,
|
|
}));
|
|
}
|
|
|
|
function getRangeRects(range: Range, fullText: string): TextRect[] {
|
|
const rects = Array.from(range.getClientRects());
|
|
const textRects: TextRect[] = [];
|
|
|
|
if (rects.length === 0) return textRects;
|
|
|
|
// If there's only one rect, use the full text
|
|
if (rects.length === 1) {
|
|
textRects.push({
|
|
rect: rects[0],
|
|
text: fullText,
|
|
});
|
|
return textRects;
|
|
}
|
|
|
|
const segments = getWordSegments(fullText);
|
|
|
|
// Calculate the total width and average width per character
|
|
const totalWidth = rects.reduce((sum, rect) => sum + rect.width, 0);
|
|
const charWidthEstimate = totalWidth / fullText.length;
|
|
|
|
let currentRect = 0;
|
|
let currentSegments: WordSegment[] = [];
|
|
let currentWidth = 0;
|
|
|
|
segments.forEach(segment => {
|
|
const segmentWidth = segment.text.length * charWidthEstimate;
|
|
const isPunctuation = /^[.,!?;:]$/.test(segment.text.trim());
|
|
|
|
// Handle punctuation: if the punctuation doesn't exceed the rect width, merge it with the previous segment
|
|
if (isPunctuation && currentSegments.length > 0) {
|
|
const withPunctuationWidth = currentWidth + segmentWidth;
|
|
// Allow slight overflow (120%) since punctuation is usually very narrow
|
|
if (withPunctuationWidth <= rects[currentRect]?.width * 1.2) {
|
|
currentSegments.push(segment);
|
|
currentWidth = withPunctuationWidth;
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (
|
|
currentWidth + segmentWidth > rects[currentRect]?.width &&
|
|
currentSegments.length > 0 &&
|
|
!isPunctuation // If it's punctuation, try merging with the previous word first
|
|
) {
|
|
textRects.push({
|
|
rect: rects[currentRect],
|
|
text: currentSegments.map(seg => seg.text).join(''),
|
|
});
|
|
|
|
currentRect++;
|
|
currentSegments = [segment];
|
|
currentWidth = segmentWidth;
|
|
} else {
|
|
currentSegments.push(segment);
|
|
currentWidth += segmentWidth;
|
|
}
|
|
});
|
|
|
|
// Handle remaining segments if any
|
|
if (currentSegments.length > 0 && currentRect < rects.length) {
|
|
textRects.push({
|
|
rect: rects[currentRect],
|
|
text: currentSegments.map(seg => seg.text).join(''),
|
|
});
|
|
}
|
|
|
|
return textRects;
|
|
}
|
|
|
|
export function getSentenceRects(
|
|
element: Element,
|
|
sentence: string
|
|
): TextRect[] {
|
|
const textNode = Array.from(element.childNodes).find(
|
|
node => node.nodeType === Node.TEXT_NODE
|
|
);
|
|
|
|
if (!textNode) return [];
|
|
|
|
const text = textNode.textContent || '';
|
|
let rects: TextRect[] = [];
|
|
let startIndex = 0;
|
|
|
|
// Find all occurrences of the sentence
|
|
while ((startIndex = text.indexOf(sentence, startIndex)) !== -1) {
|
|
const range = document.createRange();
|
|
range.setStart(textNode, startIndex);
|
|
range.setEnd(textNode, startIndex + sentence.length);
|
|
|
|
rects = rects.concat(getRangeRects(range, sentence));
|
|
startIndex += sentence.length; // Move to next potential occurrence
|
|
}
|
|
|
|
return rects;
|
|
}
|
|
|
|
export function segmentSentences(text: string): string[] {
|
|
const segmenter = new Intl.Segmenter(undefined, { granularity: 'sentence' });
|
|
return Array.from(segmenter.segment(text)).map(({ segment }) => segment);
|
|
}
|