presenter: fixed slide overlay detection for some slide sets #889

This commit is contained in:
Alex Andres 2024-04-18 17:19:20 +02:00
parent 9bfe08f8f3
commit 159f46a59c
No known key found for this signature in database
GPG key ID: 340764C7851D7041
6 changed files with 103 additions and 28 deletions

View file

@ -28,7 +28,6 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.UUID;
import java.util.stream.Stream;
import org.lecturestudio.core.geometry.Dimension2D;
import org.lecturestudio.core.geometry.Rectangle2D;
@ -758,8 +757,8 @@ public class Document {
pages.clear();
int pageCount = pdfDocument.getPageCount();
String[] prevSplitPageText = new String[2];
String[] splitPageText;
List<String> lastPageTextLines = List.of();
for (int number = 0; number < pageCount; number++) {
Page page = new Page(this, number);
@ -772,26 +771,24 @@ public class Document {
}
}
splitPageText = page.getPageText().split("\n");
List<String> pageTextLines = pdfDocument.getPageTextLines(number, 2);
if (splitPageText.length >= 2 && prevSplitPageText.length >= 2 &&
Stream.of(prevSplitPageText[0], prevSplitPageText[1], splitPageText[0], splitPageText[1])
.allMatch(Objects::nonNull)) {
if (prevSplitPageText[0].equals(splitPageText[0]) && prevSplitPageText[1].equals(splitPageText[1])) {
page.setOverlay(true);
List<String> finalLastPageTextLines = lastPageTextLines;
List<String> differences = pageTextLines.stream()
.filter(element -> !finalLastPageTextLines.contains(element)).toList();
if (number > 0) {
Page prevPage = pages.get(number - 1);
prevPage.setOverlay(true);
}
}
else {
prevSplitPageText = splitPageText;
// The pages have equal content, thus mark them as overlay pages.
if (differences.isEmpty()) {
page.setOverlay(true);
if (number > 0) {
Page prevPage = pages.get(number - 1);
prevPage.setOverlay(true);
}
}
else {
prevSplitPageText = splitPageText;
}
lastPageTextLines = pageTextLines;
pages.add(page);
}

View file

@ -130,6 +130,16 @@ public interface DocumentAdapter {
*/
String getPageText(int pageNumber) throws IOException;
/**
* Get the text of the page as a list where each list entry represents a text line in the page.
*
* @param pageNumber The page number.
* @param maxLines The maximum number of lines to retrieve.
*
* @return the text of the page as a list of text lines in the page.
*/
List<String> getPageTextLines(int pageNumber, int maxLines);
/**
* Get the word bounds of the page that has the specified page number.
*

View file

@ -317,6 +317,18 @@ public class PdfDocument {
return muPDFDocument.getPageText(pageNumber);
}
/**
* Get the text of the page as a list where each list entry represents a text line in the page.
*
* @param pageNumber The page number.
* @param maxLines The maximum number of lines to retrieve.
*
* @return the text of the page as a list of text lines in the page.
*/
public List<String> getPageTextLines(int pageNumber, int maxLines) {
return muPDFDocument.getPageTextLines(pageNumber, maxLines);
}
/**
* Get the word bounds of the page that has the specified page number.
*

View file

@ -41,14 +41,9 @@ import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Constructor;
import java.net.URI;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import org.lecturestudio.core.geometry.Rectangle2D;
import org.lecturestudio.core.io.BitConverter;
@ -201,7 +196,6 @@ public class MuPDFDocument implements DocumentAdapter {
Page page = getPage(pageNumber);
SimpleTextWalker textWalker = new SimpleTextWalker(page.getBounds());
StructuredText structuredText = displayList.toStructuredText();
structuredText.walk(textWalker);
@ -496,6 +490,62 @@ public class MuPDFDocument implements DocumentAdapter {
}
}
@Override
public List<String> getPageTextLines(int pageNumber, int maxLines) {
synchronized (mutex) {
List<String> lines = new ArrayList<>();
DisplayList displayList = getDisplayList(pageNumber);
StructuredText structuredText = displayList.toStructuredText();
// Sort page text blocks, so that the page text is chronologically ordered (top-down).
var sorted = Arrays.stream(structuredText.getBlocks())
.sorted((o1, o2) -> o1.bbox.y0 < o2.bbox.y0 ? -1 : 1)
.toList();
// Last text block y-position.
double lastY0 = Double.MAX_VALUE;
double lastY1 = Double.MAX_VALUE;
// Read text lines from each block.
for (var block : sorted) {
// Check if the blocks intersect each other.
boolean intersects = (block.bbox.y0 >= lastY0 && block.bbox.y0 <= lastY1)
|| (block.bbox.y1 >= lastY0 && block.bbox.y1 <= lastY1);
if (block.lines.length > 0) {
StructuredText.TextLine textLine = block.lines[0];
if (textLine.chars.length > 0) {
// Convert individual chars into lines of strings.
var text = Arrays.stream(textLine.chars)
.map(textChar -> Character.toString(textChar.c))
.collect(Collectors.joining(""));
// If the blocks intersect, merge the text into a single line.
if (intersects) {
// Get last line text.
int index = lines.size() - 1;
String lastLine = lines.get(index) + text;
lines.set(index, lastLine);
}
else {
lines.add(text);
}
if (lines.size() >= maxLines) {
break;
}
}
}
lastY0 = block.bbox.y0;
lastY1 = block.bbox.y1;
}
return lines;
}
}
/**
* Get the display list of the {@link PageEntry} that is mapped to the specified page number.
*

View file

@ -605,6 +605,11 @@ public class PDFBoxDocument implements DocumentAdapter {
return shapes;
}
@Override
public List<String> getPageTextLines(int pageNumber, int maxLines) {
return List.of();
}
private void loadOutline(PDDocumentCatalog catalog, PDOutlineItem item,
DocumentOutlineItem outline) {
if (isNull(item)) {

View file

@ -455,7 +455,8 @@ public class ThumbPanel extends JPanel {
if (selected) {
g2d.setColor(Color.BLUE);
g2d.fillRect(0, 0, getWidth(), getHeight());
}else if(page.isOverlay()){
}
else if (page.isOverlay()) {
g2d.setColor(Color.GRAY);
g2d.fillRect(0, 0, getWidth(), getHeight());
}