AFFiNE-Mirror/blocksuite/playground/examples/renderer/text-utils.ts

import type { TextRect } from './types';

interface WordSegment {
  text: string;
  start: number;
  end: number;
}

function getWordSegments(text: string): WordSegment[] {
  const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
  return Array.from(segmenter.segment(text)).map(({ segment, index }) => ({
    text: segment,
    start: index,
    end: index + segment.length,
  }));
}

function getRangeRects(range: Range, fullText: string): TextRect[] {
  const rects = Array.from(range.getClientRects());
  const textRects: TextRect[] = [];

  if (rects.length === 0) return textRects;

  // If there's only one rect, use the full text
  if (rects.length === 1) {
    textRects.push({
      rect: rects[0],
      text: fullText,
    });
    return textRects;
  }

  const segments = getWordSegments(fullText);

  // Calculate the total width and average width per character
  const totalWidth = rects.reduce((sum, rect) => sum + rect.width, 0);
  const charWidthEstimate = totalWidth / fullText.length;

  let currentRect = 0;
  let currentSegments: WordSegment[] = [];
  let currentWidth = 0;

  segments.forEach(segment => {
    const segmentWidth = segment.text.length * charWidthEstimate;
    const isPunctuation = /^[.,!?;:]$/.test(segment.text.trim());

    // Handle punctuation: if the punctuation doesn't exceed the rect width, merge it with the previous segment
    if (isPunctuation && currentSegments.length > 0) {
      const withPunctuationWidth = currentWidth + segmentWidth;
      // Allow slight overflow (120%) since punctuation is usually very narrow
      if (withPunctuationWidth <= rects[currentRect]?.width * 1.2) {
        currentSegments.push(segment);
        currentWidth = withPunctuationWidth;
        return;
      }
    }

    if (
      currentWidth + segmentWidth > rects[currentRect]?.width &&
      currentSegments.length > 0 &&
      !isPunctuation // If it's punctuation, try merging with the previous word first
    ) {
      textRects.push({
        rect: rects[currentRect],
        text: currentSegments.map(seg => seg.text).join(''),
      });

      currentRect++;
      currentSegments = [segment];
      currentWidth = segmentWidth;
    } else {
      currentSegments.push(segment);
      currentWidth += segmentWidth;
    }
  });

  // Handle remaining segments if any
  if (currentSegments.length > 0 && currentRect < rects.length) {
    textRects.push({
      rect: rects[currentRect],
      text: currentSegments.map(seg => seg.text).join(''),
    });
  }

  return textRects;
}

export function getSentenceRects(
  element: Element,
  sentence: string
): TextRect[] {
  const textNode = Array.from(element.childNodes).find(
    node => node.nodeType === Node.TEXT_NODE
  );

  if (!textNode) return [];

  const text = textNode.textContent || '';
  let rects: TextRect[] = [];
  let startIndex = 0;

  // Find all occurrences of the sentence
  while ((startIndex = text.indexOf(sentence, startIndex)) !== -1) {
    const range = document.createRange();
    range.setStart(textNode, startIndex);
    range.setEnd(textNode, startIndex + sentence.length);

    rects = rects.concat(getRangeRects(range, sentence));
    startIndex += sentence.length; // Move to next potential occurrence
  }

  return rects;
}

export function segmentSentences(text: string): string[] {
  const segmenter = new Intl.Segmenter(undefined, { granularity: 'sentence' });
  return Array.from(segmenter.segment(text)).map(({ segment }) => segment);
}