PDF ekleme ve metin düzenleme eklendi

This commit is contained in:
2025-11-18 00:07:17 +03:00
parent b6155c3fcb
commit 49024fa6c9
5 changed files with 262 additions and 63 deletions

View File

@@ -17,6 +17,7 @@
"@mui/material": "^6.1.1",
"@supabase/supabase-js": "^2.81.1",
"jszip": "^3.10.1",
"pdfjs-dist": "^5.4.394",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-dropzone": "^14.2.3",

View File

@@ -5,6 +5,7 @@ import {
Button,
LinearProgress,
Stack,
TextField,
Typography,
} from '@mui/material';
import { useNavigate } from 'react-router-dom';
@@ -22,9 +23,11 @@ const OcrStep = () => {
const clearTranslation = useAppStore((state) => state.clearTranslation);
const bookMetadata = useAppStore((state) => state.bookMetadata);
const epubImports = useAppStore((state) => state.epubImports);
const pdfImports = useAppStore((state) => state.pdfImports);
const [status, setStatus] = useState('idle');
const [currentIndex, setCurrentIndex] = useState(0);
const [previewText, setPreviewText] = useState('');
const [isEditing, setIsEditing] = useState(false);
const total = croppedImages.length;
const abortRef = useRef(false);
@@ -39,6 +42,12 @@ const OcrStep = () => {
const [workerReady, setWorkerReady] = useState(false);
const previewRef = useRef(null);
// removed auto navigation to translation
const handlePreviewChange = (event) => {
if (!isEditing) return;
const value = event.target.value;
setPreviewText(value);
setOcrText(value);
};
const orderedImages = useMemo(
() => [...croppedImages].sort((a, b) => (a.order ?? 0) - (b.order ?? 0)),
@@ -119,13 +128,15 @@ const OcrStep = () => {
}, [assetBase, isDev, orderedImages.length, setError]);
useEffect(() => {
if (!orderedImages.length) return;
if (!orderedImages.length && !(epubImports.length || pdfImports.length)) return;
setStatus('idle');
setCurrentIndex(0);
setPreviewText('');
setOcrText('');
setPreviewText(ocrText || '');
if (!ocrText) {
setOcrText('');
}
clearTranslation();
}, [clearTranslation, orderedImages, setOcrText]);
}, [clearTranslation, epubImports.length, orderedImages, pdfImports.length, ocrText, setOcrText]);
useEffect(() => {
if (previewRef.current) {
@@ -176,7 +187,7 @@ const OcrStep = () => {
}, [orderedImages, setError, setOcrText, status, total, workerReady]);
if (!orderedImages.length && !epubImports.length) {
if (!orderedImages.length && !epubImports.length && !pdfImports.length) {
return (
<Stack spacing={2}>
<Alert severity="info">Önce görselleri cropla.</Alert>
@@ -187,7 +198,8 @@ const OcrStep = () => {
);
}
if (!orderedImages.length && epubImports.length) {
if (!orderedImages.length && (epubImports.length || pdfImports.length)) {
const documentCount = epubImports.length + pdfImports.length;
return (
<Stack spacing={4}>
{bookMetadata && (
@@ -197,30 +209,50 @@ const OcrStep = () => {
</Typography>
)}
<Box textAlign="center">
<Typography variant="h5">EPUB metni ayrıştırıldı</Typography>
<Typography variant="h5">Belge metni ayrıştırıldı</Typography>
<Typography color="text.secondary">
{epubImports.length > 1
? `${epubImports.length} EPUB dosyasından metin çıkarıldı.`
: 'EPUB dosyasındaki metin çıkarıldı.'}
{documentCount > 1
? `${documentCount} belge dosyasından metin çıkarıldı.`
: 'Yüklediğin belgedeki metin çıkarıldı.'}
</Typography>
</Box>
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
<Typography variant="subtitle1">Ön izleme</Typography>
<Box
ref={previewRef}
sx={{
mt: 1,
maxHeight: '10em',
overflowY: 'auto',
whiteSpace: 'pre-wrap',
lineHeight: 1.5,
fontSize: '0.95rem',
color: 'text.secondary',
pr: 1,
}}
>
{ocrText?.trim() || 'Metin ayrıştırılıyor...'}
</Box>
<Stack direction="row" alignItems="center" justifyContent="space-between">
<Typography variant="subtitle1">Ön izleme</Typography>
<Button
size="small"
variant={isEditing ? 'contained' : 'outlined'}
onClick={() => setIsEditing((prev) => !prev)}
>
{isEditing ? 'Görünüme geç' : 'Metni düzenle'}
</Button>
</Stack>
{isEditing ? (
<TextField
fullWidth
multiline
minRows={6}
value={previewText}
onChange={handlePreviewChange}
sx={{ mt: 1 }}
/>
) : (
<Box
ref={previewRef}
sx={{
mt: 1,
maxHeight: '10em',
overflowY: 'auto',
whiteSpace: 'pre-wrap',
lineHeight: 1.5,
fontSize: '0.95rem',
color: 'text.secondary',
pr: 1,
}}
>
{previewText || ocrText?.trim() || 'Metin ayrıştırılıyor...'}
</Box>
)}
</Box>
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
<Button variant="contained" onClick={() => navigate('/')}>
@@ -272,22 +304,42 @@ const OcrStep = () => {
</Typography>
</Box>
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
<Typography variant="subtitle1">Ön izleme</Typography>
<Box
ref={previewRef}
sx={{
mt: 1,
maxHeight: '8.5em',
overflowY: 'auto',
whiteSpace: 'pre-wrap',
lineHeight: 1.5,
fontSize: '0.95rem',
color: 'text.secondary',
pr: 1,
}}
>
{previewText || 'Metin bekleniyor'}
</Box>
<Stack direction="row" alignItems="center" justifyContent="space-between">
<Typography variant="subtitle1">Ön izleme</Typography>
<Button
size="small"
variant={isEditing ? 'contained' : 'outlined'}
onClick={() => setIsEditing((prev) => !prev)}
>
{isEditing ? 'Görünüme geç' : 'Metni düzenle'}
</Button>
</Stack>
{isEditing ? (
<TextField
fullWidth
multiline
minRows={6}
value={previewText}
onChange={handlePreviewChange}
sx={{ mt: 1 }}
/>
) : (
<Box
ref={previewRef}
sx={{
mt: 1,
maxHeight: '8.5em',
overflowY: 'auto',
whiteSpace: 'pre-wrap',
lineHeight: 1.5,
fontSize: '0.95rem',
color: 'text.secondary',
pr: 1,
}}
>
{previewText || 'Metin bekleniyor'}
</Box>
)}
</Box>
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
<Button variant="contained" onClick={() => navigate('/bulk-crop')}>

View File

@@ -19,6 +19,7 @@ import {
import { useNavigate } from 'react-router-dom';
import { useAppStore } from '../store/useAppStore';
import { extractTextFromEpub } from '../utils/epubImport';
import { extractTextFromPdf } from '../utils/pdfImport';
const dropzoneStyle = {
border: '2px dashed rgba(108, 155, 207, 0.7)',
@@ -42,6 +43,8 @@ const UploadStep = () => {
const setBookMetadata = useAppStore((state) => state.setBookMetadata);
const epubImports = useAppStore((state) => state.epubImports);
const setEpubImports = useAppStore((state) => state.setEpubImports);
const pdfImports = useAppStore((state) => state.pdfImports);
const setPdfImports = useAppStore((state) => state.setPdfImports);
const setOcrText = useAppStore((state) => state.setOcrText);
const clearTranslation = useAppStore((state) => state.clearTranslation);
const setError = useAppStore((state) => state.setError);
@@ -51,12 +54,12 @@ const UploadStep = () => {
const [selectedBookId, setSelectedBookId] = useState(bookMetadata?.id || null);
const skipSearchRef = useRef(false);
const [showResults, setShowResults] = useState(false);
const [epubProcessing, setEpubProcessing] = useState(false);
const [docProcessing, setDocProcessing] = useState(false);
const onDrop = useCallback(
async (acceptedFiles) => {
if (!acceptedFiles.length) return;
setEpubProcessing(true);
setDocProcessing(true);
const preservedMetadata = bookMetadata;
const preservedTitle = bookTitle;
resetFromStep('upload');
@@ -71,11 +74,19 @@ const UploadStep = () => {
const imageFiles = [];
const epubFiles = [];
const pdfFiles = [];
acceptedFiles.forEach((file) => {
const lowerName = file.name?.toLowerCase() || '';
const isEpub =
file.type === 'application/epub+zip' || file.name?.toLowerCase().endsWith('.epub');
file.type === 'application/epub+zip' ||
file.type === 'application/zip' ||
file.type === 'application/x-zip-compressed' ||
lowerName.endsWith('.epub');
const isPdf = file.type === 'application/pdf' || lowerName.endsWith('.pdf');
if (isEpub) {
epubFiles.push(file);
} else if (isPdf) {
pdfFiles.push(file);
} else {
imageFiles.push(file);
}
@@ -92,13 +103,13 @@ const UploadStep = () => {
setUploadedImages([...uploadedImages, ...mapped]);
}
const importedEntries = [];
const importedEpubEntries = [];
if (epubFiles.length) {
for (const file of epubFiles) {
try {
// eslint-disable-next-line no-await-in-loop
const parsed = await extractTextFromEpub(file);
importedEntries.push({
importedEpubEntries.push({
id: crypto.randomUUID(),
filename: file.name,
size: file.size,
@@ -109,23 +120,54 @@ const UploadStep = () => {
setError(error.message || `${file.name} okunamadı.`);
}
}
setEpubImports(importedEntries);
setEpubImports(importedEpubEntries);
} else {
setEpubImports([]);
}
const importedPdfEntries = [];
if (pdfFiles.length) {
for (const file of pdfFiles) {
try {
// eslint-disable-next-line no-await-in-loop
const parsed = await extractTextFromPdf(file);
importedPdfEntries.push({
id: crypto.randomUUID(),
filename: file.name,
size: file.size,
text: parsed.text,
metadata: parsed.metadata,
});
} catch (error) {
setError(error.message || `${file.name} okunamadı.`);
}
}
setPdfImports(importedPdfEntries);
} else {
setPdfImports([]);
}
if (importedEpubEntries.length || importedPdfEntries.length) {
clearTranslation();
const combinedText = importedEntries.map((entry) => entry.text).filter(Boolean).join('\n\n');
const combinedText = [...importedEpubEntries, ...importedPdfEntries]
.map((entry) => entry.text)
.filter(Boolean)
.join('\n\n');
if (combinedText) {
setOcrText(combinedText);
}
if (!preservedMetadata && importedEntries[0]?.metadata) {
const meta = importedEntries[0].metadata;
const firstMeta = importedEpubEntries[0]?.metadata || importedPdfEntries[0]?.metadata;
if (!preservedMetadata && firstMeta) {
const meta = firstMeta;
setBookMetadata({
id: `epub-${crypto.randomUUID()}`,
title: meta.title || bookTitle || 'İsimsiz EPUB',
id: `doc-${crypto.randomUUID()}`,
title: meta.title || bookTitle || 'İsimsiz belge',
subtitle: '',
authors: meta.authors || [],
publisher: meta.publisher || '',
publishedDate: meta.publishedDate || '',
description: meta.description || '',
pageCount: null,
pageCount: meta.pageCount || null,
categories: meta.categories || [],
averageRating: null,
ratingsCount: null,
@@ -139,10 +181,10 @@ const UploadStep = () => {
setBookTitle(meta.title);
}
}
} else {
setEpubImports([]);
} else if (!preservedMetadata) {
setBookMetadata(null);
}
setEpubProcessing(false);
setDocProcessing(false);
},
[
bookMetadata,
@@ -152,6 +194,7 @@ const UploadStep = () => {
setBookMetadata,
setBookTitle,
setEpubImports,
setPdfImports,
setError,
setOcrText,
setUploadedImages,
@@ -259,6 +302,7 @@ const UploadStep = () => {
'application/zip': ['.epub'],
'application/x-zip-compressed': ['.epub'],
'application/octet-stream': ['.epub'],
'application/pdf': ['.pdf'],
},
multiple: true,
});
@@ -303,8 +347,8 @@ const UploadStep = () => {
}, [bookMetadata]);
const hasImages = uploadedImages.length > 0;
const hasEpubImports = epubImports.length > 0;
const canProceed = hasImages || hasEpubImports;
const hasDocumentImports = epubImports.length > 0 || pdfImports.length > 0;
const canProceed = hasImages || hasDocumentImports;
const nextPath = hasImages ? '/crop' : '/ocr';
return (
@@ -466,10 +510,10 @@ const UploadStep = () => {
<Box {...getRootProps()} sx={dropzoneStyle}>
<input {...getInputProps()} />
<Typography variant="h5" gutterBottom>
Görselleri veya EPUB dosyasını sürükleyip bırak ya da tıkla
Görselleri, EPUB veya PDF dosyalarını sürükleyip bırak ya da tıkla
</Typography>
<Typography color="text.secondary" gutterBottom>
.png, .jpg, .jpeg formatlarında çoklu görsel ya da .epub dosyaları yükleyebilirsin.
.png, .jpg, .jpeg, .webp görsellerinin yanı sıra .epub ve .pdf belgeleri yükleyebilirsin.
</Typography>
<Button variant="contained" color="primary">
Dosya seç
@@ -479,9 +523,9 @@ const UploadStep = () => {
Bırak ve yükleyelim!
</Typography>
)}
{epubProcessing && (
{docProcessing && (
<Typography mt={2} color="text.secondary">
EPUB içeriği ayrıştırılıyor...
Belge içeriği ayrıştırılıyor...
</Typography>
)}
</Box>
@@ -524,6 +568,44 @@ const UploadStep = () => {
</Box>
)}
{pdfImports.length > 0 && (
<Box>
<Typography variant="h6" gutterBottom>
Yüklenen PDF dosyaları ({pdfImports.length})
</Typography>
<Stack spacing={1.5}>
{pdfImports.map((item) => (
<Paper key={item.id} variant="outlined" sx={{ p: 2, borderRadius: 2 }}>
<Typography variant="subtitle1" sx={{ fontWeight: 600 }}>
{item.metadata?.title || item.filename}
</Typography>
{item.metadata?.authors?.length ? (
<Typography variant="body2" color="text.secondary" sx={{ fontStyle: 'italic' }}>
{item.metadata.authors.join(', ')}
</Typography>
) : (
<Typography variant="body2" color="text.secondary">
Yazar bilgisi bulunamadı
</Typography>
)}
<Typography variant="caption" color="text.secondary" display="block" mt={0.5}>
{[
item.metadata?.publisher,
item.metadata?.language ? item.metadata.language.toUpperCase() : null,
item.metadata?.pageCount ? `${item.metadata.pageCount} sayfa` : null,
]
.filter(Boolean)
.join(' • ')}
</Typography>
<Typography variant="caption" color="text.secondary" display="block">
{item.filename} {(item.size / (1024 * 1024)).toFixed(2)} MB
</Typography>
</Paper>
))}
</Stack>
</Box>
)}
<Box>
<Typography variant="h6" gutterBottom>
Yüklenen görseller ({uploadedImages.length})

View File

@@ -45,6 +45,7 @@ export const useAppStore = create((set) => ({
croppedCoverImage: null,
ocrText: '',
epubImports: [],
pdfImports: [],
bookTitle: '',
bookMetadata: null,
translatedText: '',
@@ -76,6 +77,7 @@ export const useAppStore = create((set) => ({
}),
setOcrText: (text) => set({ ocrText: text }),
setEpubImports: (imports) => set({ epubImports: imports }),
setPdfImports: (imports) => set({ pdfImports: imports }),
setBookTitle: (title) => set({ bookTitle: title }),
setBookMetadata: (metadata) => set({ bookMetadata: metadata }),
setTranslatedText: (text) => set({ translatedText: text }),
@@ -154,6 +156,7 @@ export const useAppStore = create((set) => ({
draft.croppedCoverImage = null;
draft.ocrText = '';
draft.epubImports = [];
draft.pdfImports = [];
draft.bookTitle = '';
draft.bookMetadata = null;
draft.translatedText = '';

61
src/utils/pdfImport.js Normal file
View File

@@ -0,0 +1,61 @@
import { GlobalWorkerOptions, getDocument } from 'pdfjs-dist/build/pdf.mjs';
import pdfWorker from 'pdfjs-dist/build/pdf.worker.mjs?url';
GlobalWorkerOptions.workerSrc = pdfWorker;
const cleanText = (value = '') =>
value
.replace(/\r/g, '')
.replace(/\t/g, ' ')
.replace(/\s+\n/g, '\n')
.replace(/\n{3,}/g, '\n\n')
.replace(/[ \u00A0]{2,}/g, ' ')
.trim();
const extractPageText = async (page) => {
const content = await page.getTextContent();
const strings = content.items
.map((item) => ('str' in item ? item.str : item?.unicode))
.filter(Boolean)
.map((text) => text.replace(/\s+/g, ' ').trim());
return cleanText(strings.join(' '));
};
export const extractTextFromPdf = async (file) => {
const arrayBuffer = await file.arrayBuffer();
const pdf = await getDocument({ data: arrayBuffer }).promise;
const texts = [];
for (let pageNumber = 1; pageNumber <= pdf.numPages; pageNumber += 1) {
// eslint-disable-next-line no-await-in-loop
const page = await pdf.getPage(pageNumber);
// eslint-disable-next-line no-await-in-loop
const text = await extractPageText(page);
if (text) {
texts.push(text);
}
}
let info = {};
try {
// pdf.getMetadata() resolves with info + metadata.
// eslint-disable-next-line no-await-in-loop
const metadata = await pdf.getMetadata();
info = metadata?.info || {};
} catch {
info = {};
}
return {
text: texts.join('\n\n').trim(),
metadata: {
title: info.Title || '',
authors: info.Author ? [info.Author] : [],
publisher: info.Producer || '',
language: info.Language || '',
publishedDate: info.CreationDate || '',
description: '',
identifiers: info.Identifier ? [{ identifier: info.Identifier }] : [],
filename: file.name,
fileSize: file.size,
pageCount: pdf.numPages,
},
};
};