PDF ekleme ve metin düzenleme eklendi
This commit is contained in:
@@ -17,6 +17,7 @@
|
||||
"@mui/material": "^6.1.1",
|
||||
"@supabase/supabase-js": "^2.81.1",
|
||||
"jszip": "^3.10.1",
|
||||
"pdfjs-dist": "^5.4.394",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-dropzone": "^14.2.3",
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
Button,
|
||||
LinearProgress,
|
||||
Stack,
|
||||
TextField,
|
||||
Typography,
|
||||
} from '@mui/material';
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
@@ -22,9 +23,11 @@ const OcrStep = () => {
|
||||
const clearTranslation = useAppStore((state) => state.clearTranslation);
|
||||
const bookMetadata = useAppStore((state) => state.bookMetadata);
|
||||
const epubImports = useAppStore((state) => state.epubImports);
|
||||
const pdfImports = useAppStore((state) => state.pdfImports);
|
||||
const [status, setStatus] = useState('idle');
|
||||
const [currentIndex, setCurrentIndex] = useState(0);
|
||||
const [previewText, setPreviewText] = useState('');
|
||||
const [isEditing, setIsEditing] = useState(false);
|
||||
const total = croppedImages.length;
|
||||
const abortRef = useRef(false);
|
||||
|
||||
@@ -39,6 +42,12 @@ const OcrStep = () => {
|
||||
const [workerReady, setWorkerReady] = useState(false);
|
||||
const previewRef = useRef(null);
|
||||
// removed auto navigation to translation
|
||||
const handlePreviewChange = (event) => {
|
||||
if (!isEditing) return;
|
||||
const value = event.target.value;
|
||||
setPreviewText(value);
|
||||
setOcrText(value);
|
||||
};
|
||||
|
||||
const orderedImages = useMemo(
|
||||
() => [...croppedImages].sort((a, b) => (a.order ?? 0) - (b.order ?? 0)),
|
||||
@@ -119,13 +128,15 @@ const OcrStep = () => {
|
||||
}, [assetBase, isDev, orderedImages.length, setError]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!orderedImages.length) return;
|
||||
if (!orderedImages.length && !(epubImports.length || pdfImports.length)) return;
|
||||
setStatus('idle');
|
||||
setCurrentIndex(0);
|
||||
setPreviewText('');
|
||||
setPreviewText(ocrText || '');
|
||||
if (!ocrText) {
|
||||
setOcrText('');
|
||||
}
|
||||
clearTranslation();
|
||||
}, [clearTranslation, orderedImages, setOcrText]);
|
||||
}, [clearTranslation, epubImports.length, orderedImages, pdfImports.length, ocrText, setOcrText]);
|
||||
|
||||
useEffect(() => {
|
||||
if (previewRef.current) {
|
||||
@@ -176,7 +187,7 @@ const OcrStep = () => {
|
||||
}, [orderedImages, setError, setOcrText, status, total, workerReady]);
|
||||
|
||||
|
||||
if (!orderedImages.length && !epubImports.length) {
|
||||
if (!orderedImages.length && !epubImports.length && !pdfImports.length) {
|
||||
return (
|
||||
<Stack spacing={2}>
|
||||
<Alert severity="info">Önce görselleri cropla.</Alert>
|
||||
@@ -187,7 +198,8 @@ const OcrStep = () => {
|
||||
);
|
||||
}
|
||||
|
||||
if (!orderedImages.length && epubImports.length) {
|
||||
if (!orderedImages.length && (epubImports.length || pdfImports.length)) {
|
||||
const documentCount = epubImports.length + pdfImports.length;
|
||||
return (
|
||||
<Stack spacing={4}>
|
||||
{bookMetadata && (
|
||||
@@ -197,15 +209,34 @@ const OcrStep = () => {
|
||||
</Typography>
|
||||
)}
|
||||
<Box textAlign="center">
|
||||
<Typography variant="h5">EPUB metni ayrıştırıldı</Typography>
|
||||
<Typography variant="h5">Belge metni ayrıştırıldı</Typography>
|
||||
<Typography color="text.secondary">
|
||||
{epubImports.length > 1
|
||||
? `${epubImports.length} EPUB dosyasından metin çıkarıldı.`
|
||||
: 'EPUB dosyasındaki metin çıkarıldı.'}
|
||||
{documentCount > 1
|
||||
? `${documentCount} belge dosyasından metin çıkarıldı.`
|
||||
: 'Yüklediğin belgedeki metin çıkarıldı.'}
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
|
||||
<Stack direction="row" alignItems="center" justifyContent="space-between">
|
||||
<Typography variant="subtitle1">Ön izleme</Typography>
|
||||
<Button
|
||||
size="small"
|
||||
variant={isEditing ? 'contained' : 'outlined'}
|
||||
onClick={() => setIsEditing((prev) => !prev)}
|
||||
>
|
||||
{isEditing ? 'Görünüme geç' : 'Metni düzenle'}
|
||||
</Button>
|
||||
</Stack>
|
||||
{isEditing ? (
|
||||
<TextField
|
||||
fullWidth
|
||||
multiline
|
||||
minRows={6}
|
||||
value={previewText}
|
||||
onChange={handlePreviewChange}
|
||||
sx={{ mt: 1 }}
|
||||
/>
|
||||
) : (
|
||||
<Box
|
||||
ref={previewRef}
|
||||
sx={{
|
||||
@@ -219,8 +250,9 @@ const OcrStep = () => {
|
||||
pr: 1,
|
||||
}}
|
||||
>
|
||||
{ocrText?.trim() || 'Metin ayrıştırılıyor...'}
|
||||
{previewText || ocrText?.trim() || 'Metin ayrıştırılıyor...'}
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
|
||||
<Button variant="contained" onClick={() => navigate('/')}>
|
||||
@@ -272,7 +304,26 @@ const OcrStep = () => {
|
||||
</Typography>
|
||||
</Box>
|
||||
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
|
||||
<Stack direction="row" alignItems="center" justifyContent="space-between">
|
||||
<Typography variant="subtitle1">Ön izleme</Typography>
|
||||
<Button
|
||||
size="small"
|
||||
variant={isEditing ? 'contained' : 'outlined'}
|
||||
onClick={() => setIsEditing((prev) => !prev)}
|
||||
>
|
||||
{isEditing ? 'Görünüme geç' : 'Metni düzenle'}
|
||||
</Button>
|
||||
</Stack>
|
||||
{isEditing ? (
|
||||
<TextField
|
||||
fullWidth
|
||||
multiline
|
||||
minRows={6}
|
||||
value={previewText}
|
||||
onChange={handlePreviewChange}
|
||||
sx={{ mt: 1 }}
|
||||
/>
|
||||
) : (
|
||||
<Box
|
||||
ref={previewRef}
|
||||
sx={{
|
||||
@@ -288,6 +339,7 @@ const OcrStep = () => {
|
||||
>
|
||||
{previewText || 'Metin bekleniyor'}
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
|
||||
<Button variant="contained" onClick={() => navigate('/bulk-crop')}>
|
||||
|
||||
@@ -19,6 +19,7 @@ import {
|
||||
import { useNavigate } from 'react-router-dom';
|
||||
import { useAppStore } from '../store/useAppStore';
|
||||
import { extractTextFromEpub } from '../utils/epubImport';
|
||||
import { extractTextFromPdf } from '../utils/pdfImport';
|
||||
|
||||
const dropzoneStyle = {
|
||||
border: '2px dashed rgba(108, 155, 207, 0.7)',
|
||||
@@ -42,6 +43,8 @@ const UploadStep = () => {
|
||||
const setBookMetadata = useAppStore((state) => state.setBookMetadata);
|
||||
const epubImports = useAppStore((state) => state.epubImports);
|
||||
const setEpubImports = useAppStore((state) => state.setEpubImports);
|
||||
const pdfImports = useAppStore((state) => state.pdfImports);
|
||||
const setPdfImports = useAppStore((state) => state.setPdfImports);
|
||||
const setOcrText = useAppStore((state) => state.setOcrText);
|
||||
const clearTranslation = useAppStore((state) => state.clearTranslation);
|
||||
const setError = useAppStore((state) => state.setError);
|
||||
@@ -51,12 +54,12 @@ const UploadStep = () => {
|
||||
const [selectedBookId, setSelectedBookId] = useState(bookMetadata?.id || null);
|
||||
const skipSearchRef = useRef(false);
|
||||
const [showResults, setShowResults] = useState(false);
|
||||
const [epubProcessing, setEpubProcessing] = useState(false);
|
||||
const [docProcessing, setDocProcessing] = useState(false);
|
||||
|
||||
const onDrop = useCallback(
|
||||
async (acceptedFiles) => {
|
||||
if (!acceptedFiles.length) return;
|
||||
setEpubProcessing(true);
|
||||
setDocProcessing(true);
|
||||
const preservedMetadata = bookMetadata;
|
||||
const preservedTitle = bookTitle;
|
||||
resetFromStep('upload');
|
||||
@@ -71,11 +74,19 @@ const UploadStep = () => {
|
||||
|
||||
const imageFiles = [];
|
||||
const epubFiles = [];
|
||||
const pdfFiles = [];
|
||||
acceptedFiles.forEach((file) => {
|
||||
const lowerName = file.name?.toLowerCase() || '';
|
||||
const isEpub =
|
||||
file.type === 'application/epub+zip' || file.name?.toLowerCase().endsWith('.epub');
|
||||
file.type === 'application/epub+zip' ||
|
||||
file.type === 'application/zip' ||
|
||||
file.type === 'application/x-zip-compressed' ||
|
||||
lowerName.endsWith('.epub');
|
||||
const isPdf = file.type === 'application/pdf' || lowerName.endsWith('.pdf');
|
||||
if (isEpub) {
|
||||
epubFiles.push(file);
|
||||
} else if (isPdf) {
|
||||
pdfFiles.push(file);
|
||||
} else {
|
||||
imageFiles.push(file);
|
||||
}
|
||||
@@ -92,13 +103,13 @@ const UploadStep = () => {
|
||||
setUploadedImages([...uploadedImages, ...mapped]);
|
||||
}
|
||||
|
||||
const importedEntries = [];
|
||||
const importedEpubEntries = [];
|
||||
if (epubFiles.length) {
|
||||
for (const file of epubFiles) {
|
||||
try {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const parsed = await extractTextFromEpub(file);
|
||||
importedEntries.push({
|
||||
importedEpubEntries.push({
|
||||
id: crypto.randomUUID(),
|
||||
filename: file.name,
|
||||
size: file.size,
|
||||
@@ -109,23 +120,54 @@ const UploadStep = () => {
|
||||
setError(error.message || `${file.name} okunamadı.`);
|
||||
}
|
||||
}
|
||||
setEpubImports(importedEntries);
|
||||
setEpubImports(importedEpubEntries);
|
||||
} else {
|
||||
setEpubImports([]);
|
||||
}
|
||||
|
||||
const importedPdfEntries = [];
|
||||
if (pdfFiles.length) {
|
||||
for (const file of pdfFiles) {
|
||||
try {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const parsed = await extractTextFromPdf(file);
|
||||
importedPdfEntries.push({
|
||||
id: crypto.randomUUID(),
|
||||
filename: file.name,
|
||||
size: file.size,
|
||||
text: parsed.text,
|
||||
metadata: parsed.metadata,
|
||||
});
|
||||
} catch (error) {
|
||||
setError(error.message || `${file.name} okunamadı.`);
|
||||
}
|
||||
}
|
||||
setPdfImports(importedPdfEntries);
|
||||
} else {
|
||||
setPdfImports([]);
|
||||
}
|
||||
|
||||
if (importedEpubEntries.length || importedPdfEntries.length) {
|
||||
clearTranslation();
|
||||
const combinedText = importedEntries.map((entry) => entry.text).filter(Boolean).join('\n\n');
|
||||
const combinedText = [...importedEpubEntries, ...importedPdfEntries]
|
||||
.map((entry) => entry.text)
|
||||
.filter(Boolean)
|
||||
.join('\n\n');
|
||||
if (combinedText) {
|
||||
setOcrText(combinedText);
|
||||
}
|
||||
if (!preservedMetadata && importedEntries[0]?.metadata) {
|
||||
const meta = importedEntries[0].metadata;
|
||||
const firstMeta = importedEpubEntries[0]?.metadata || importedPdfEntries[0]?.metadata;
|
||||
if (!preservedMetadata && firstMeta) {
|
||||
const meta = firstMeta;
|
||||
setBookMetadata({
|
||||
id: `epub-${crypto.randomUUID()}`,
|
||||
title: meta.title || bookTitle || 'İsimsiz EPUB',
|
||||
id: `doc-${crypto.randomUUID()}`,
|
||||
title: meta.title || bookTitle || 'İsimsiz belge',
|
||||
subtitle: '',
|
||||
authors: meta.authors || [],
|
||||
publisher: meta.publisher || '',
|
||||
publishedDate: meta.publishedDate || '',
|
||||
description: meta.description || '',
|
||||
pageCount: null,
|
||||
pageCount: meta.pageCount || null,
|
||||
categories: meta.categories || [],
|
||||
averageRating: null,
|
||||
ratingsCount: null,
|
||||
@@ -139,10 +181,10 @@ const UploadStep = () => {
|
||||
setBookTitle(meta.title);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
setEpubImports([]);
|
||||
} else if (!preservedMetadata) {
|
||||
setBookMetadata(null);
|
||||
}
|
||||
setEpubProcessing(false);
|
||||
setDocProcessing(false);
|
||||
},
|
||||
[
|
||||
bookMetadata,
|
||||
@@ -152,6 +194,7 @@ const UploadStep = () => {
|
||||
setBookMetadata,
|
||||
setBookTitle,
|
||||
setEpubImports,
|
||||
setPdfImports,
|
||||
setError,
|
||||
setOcrText,
|
||||
setUploadedImages,
|
||||
@@ -259,6 +302,7 @@ const UploadStep = () => {
|
||||
'application/zip': ['.epub'],
|
||||
'application/x-zip-compressed': ['.epub'],
|
||||
'application/octet-stream': ['.epub'],
|
||||
'application/pdf': ['.pdf'],
|
||||
},
|
||||
multiple: true,
|
||||
});
|
||||
@@ -303,8 +347,8 @@ const UploadStep = () => {
|
||||
}, [bookMetadata]);
|
||||
|
||||
const hasImages = uploadedImages.length > 0;
|
||||
const hasEpubImports = epubImports.length > 0;
|
||||
const canProceed = hasImages || hasEpubImports;
|
||||
const hasDocumentImports = epubImports.length > 0 || pdfImports.length > 0;
|
||||
const canProceed = hasImages || hasDocumentImports;
|
||||
const nextPath = hasImages ? '/crop' : '/ocr';
|
||||
|
||||
return (
|
||||
@@ -466,10 +510,10 @@ const UploadStep = () => {
|
||||
<Box {...getRootProps()} sx={dropzoneStyle}>
|
||||
<input {...getInputProps()} />
|
||||
<Typography variant="h5" gutterBottom>
|
||||
Görselleri veya EPUB dosyasını sürükleyip bırak ya da tıkla
|
||||
Görselleri, EPUB veya PDF dosyalarını sürükleyip bırak ya da tıkla
|
||||
</Typography>
|
||||
<Typography color="text.secondary" gutterBottom>
|
||||
.png, .jpg, .jpeg formatlarında çoklu görsel ya da .epub dosyaları yükleyebilirsin.
|
||||
.png, .jpg, .jpeg, .webp görsellerinin yanı sıra .epub ve .pdf belgeleri yükleyebilirsin.
|
||||
</Typography>
|
||||
<Button variant="contained" color="primary">
|
||||
Dosya seç
|
||||
@@ -479,9 +523,9 @@ const UploadStep = () => {
|
||||
Bırak ve yükleyelim!
|
||||
</Typography>
|
||||
)}
|
||||
{epubProcessing && (
|
||||
{docProcessing && (
|
||||
<Typography mt={2} color="text.secondary">
|
||||
EPUB içeriği ayrıştırılıyor...
|
||||
Belge içeriği ayrıştırılıyor...
|
||||
</Typography>
|
||||
)}
|
||||
</Box>
|
||||
@@ -524,6 +568,44 @@ const UploadStep = () => {
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{pdfImports.length > 0 && (
|
||||
<Box>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
Yüklenen PDF dosyaları ({pdfImports.length})
|
||||
</Typography>
|
||||
<Stack spacing={1.5}>
|
||||
{pdfImports.map((item) => (
|
||||
<Paper key={item.id} variant="outlined" sx={{ p: 2, borderRadius: 2 }}>
|
||||
<Typography variant="subtitle1" sx={{ fontWeight: 600 }}>
|
||||
{item.metadata?.title || item.filename}
|
||||
</Typography>
|
||||
{item.metadata?.authors?.length ? (
|
||||
<Typography variant="body2" color="text.secondary" sx={{ fontStyle: 'italic' }}>
|
||||
{item.metadata.authors.join(', ')}
|
||||
</Typography>
|
||||
) : (
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
Yazar bilgisi bulunamadı
|
||||
</Typography>
|
||||
)}
|
||||
<Typography variant="caption" color="text.secondary" display="block" mt={0.5}>
|
||||
{[
|
||||
item.metadata?.publisher,
|
||||
item.metadata?.language ? item.metadata.language.toUpperCase() : null,
|
||||
item.metadata?.pageCount ? `${item.metadata.pageCount} sayfa` : null,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(' • ')}
|
||||
</Typography>
|
||||
<Typography variant="caption" color="text.secondary" display="block">
|
||||
{item.filename} • {(item.size / (1024 * 1024)).toFixed(2)} MB
|
||||
</Typography>
|
||||
</Paper>
|
||||
))}
|
||||
</Stack>
|
||||
</Box>
|
||||
)}
|
||||
|
||||
<Box>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
Yüklenen görseller ({uploadedImages.length})
|
||||
|
||||
@@ -45,6 +45,7 @@ export const useAppStore = create((set) => ({
|
||||
croppedCoverImage: null,
|
||||
ocrText: '',
|
||||
epubImports: [],
|
||||
pdfImports: [],
|
||||
bookTitle: '',
|
||||
bookMetadata: null,
|
||||
translatedText: '',
|
||||
@@ -76,6 +77,7 @@ export const useAppStore = create((set) => ({
|
||||
}),
|
||||
setOcrText: (text) => set({ ocrText: text }),
|
||||
setEpubImports: (imports) => set({ epubImports: imports }),
|
||||
setPdfImports: (imports) => set({ pdfImports: imports }),
|
||||
setBookTitle: (title) => set({ bookTitle: title }),
|
||||
setBookMetadata: (metadata) => set({ bookMetadata: metadata }),
|
||||
setTranslatedText: (text) => set({ translatedText: text }),
|
||||
@@ -154,6 +156,7 @@ export const useAppStore = create((set) => ({
|
||||
draft.croppedCoverImage = null;
|
||||
draft.ocrText = '';
|
||||
draft.epubImports = [];
|
||||
draft.pdfImports = [];
|
||||
draft.bookTitle = '';
|
||||
draft.bookMetadata = null;
|
||||
draft.translatedText = '';
|
||||
|
||||
61
src/utils/pdfImport.js
Normal file
61
src/utils/pdfImport.js
Normal file
@@ -0,0 +1,61 @@
|
||||
import { GlobalWorkerOptions, getDocument } from 'pdfjs-dist/build/pdf.mjs';
|
||||
import pdfWorker from 'pdfjs-dist/build/pdf.worker.mjs?url';
|
||||
|
||||
GlobalWorkerOptions.workerSrc = pdfWorker;
|
||||
|
||||
const cleanText = (value = '') =>
|
||||
value
|
||||
.replace(/\r/g, '')
|
||||
.replace(/\t/g, ' ')
|
||||
.replace(/\s+\n/g, '\n')
|
||||
.replace(/\n{3,}/g, '\n\n')
|
||||
.replace(/[ \u00A0]{2,}/g, ' ')
|
||||
.trim();
|
||||
|
||||
const extractPageText = async (page) => {
|
||||
const content = await page.getTextContent();
|
||||
const strings = content.items
|
||||
.map((item) => ('str' in item ? item.str : item?.unicode))
|
||||
.filter(Boolean)
|
||||
.map((text) => text.replace(/\s+/g, ' ').trim());
|
||||
return cleanText(strings.join(' '));
|
||||
};
|
||||
|
||||
export const extractTextFromPdf = async (file) => {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdf = await getDocument({ data: arrayBuffer }).promise;
|
||||
const texts = [];
|
||||
for (let pageNumber = 1; pageNumber <= pdf.numPages; pageNumber += 1) {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const page = await pdf.getPage(pageNumber);
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const text = await extractPageText(page);
|
||||
if (text) {
|
||||
texts.push(text);
|
||||
}
|
||||
}
|
||||
let info = {};
|
||||
try {
|
||||
// pdf.getMetadata() resolves with info + metadata.
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const metadata = await pdf.getMetadata();
|
||||
info = metadata?.info || {};
|
||||
} catch {
|
||||
info = {};
|
||||
}
|
||||
return {
|
||||
text: texts.join('\n\n').trim(),
|
||||
metadata: {
|
||||
title: info.Title || '',
|
||||
authors: info.Author ? [info.Author] : [],
|
||||
publisher: info.Producer || '',
|
||||
language: info.Language || '',
|
||||
publishedDate: info.CreationDate || '',
|
||||
description: '',
|
||||
identifiers: info.Identifier ? [{ identifier: info.Identifier }] : [],
|
||||
filename: file.name,
|
||||
fileSize: file.size,
|
||||
pageCount: pdf.numPages,
|
||||
},
|
||||
};
|
||||
};
|
||||
Reference in New Issue
Block a user