361 lines
12 KiB
JavaScript
361 lines
12 KiB
JavaScript
import { useEffect, useMemo, useRef, useState } from 'react';
|
||
import {
|
||
Alert,
|
||
Box,
|
||
Button,
|
||
LinearProgress,
|
||
Stack,
|
||
TextField,
|
||
Typography,
|
||
} from '@mui/material';
|
||
import { useNavigate } from 'react-router-dom';
|
||
import Tesseract from 'tesseract.js';
|
||
import { useAppStore } from '../store/useAppStore';
|
||
import { correctTurkishCharacters } from '../utils/ocrUtils';
|
||
|
||
const OcrStep = () => {
|
||
const navigate = useNavigate();
|
||
const isDev = import.meta.env.DEV;
|
||
const croppedImages = useAppStore((state) => state.croppedImages);
|
||
const ocrText = useAppStore((state) => state.ocrText);
|
||
const setOcrText = useAppStore((state) => state.setOcrText);
|
||
const setError = useAppStore((state) => state.setError);
|
||
const clearTranslation = useAppStore((state) => state.clearTranslation);
|
||
const bookMetadata = useAppStore((state) => state.bookMetadata);
|
||
const epubImports = useAppStore((state) => state.epubImports);
|
||
const pdfImports = useAppStore((state) => state.pdfImports);
|
||
const [status, setStatus] = useState('idle');
|
||
const [currentIndex, setCurrentIndex] = useState(0);
|
||
const [previewText, setPreviewText] = useState('');
|
||
const [isEditing, setIsEditing] = useState(false);
|
||
const total = croppedImages.length;
|
||
const abortRef = useRef(false);
|
||
|
||
const assetBase = useMemo(() => {
|
||
const rawBase = import.meta.env.BASE_URL ?? '/';
|
||
if (rawBase === '.' || rawBase === './' || rawBase === '/') {
|
||
return '';
|
||
}
|
||
return rawBase.endsWith('/') ? rawBase.slice(0, -1) : rawBase;
|
||
}, []);
|
||
const workerRef = useRef(null);
|
||
const [workerReady, setWorkerReady] = useState(false);
|
||
const previewRef = useRef(null);
|
||
// removed auto navigation to translation
|
||
const handlePreviewChange = (event) => {
|
||
if (!isEditing) return;
|
||
const value = event.target.value;
|
||
setPreviewText(value);
|
||
setOcrText(value);
|
||
};
|
||
|
||
const orderedImages = useMemo(
|
||
() => [...croppedImages].sort((a, b) => (a.order ?? 0) - (b.order ?? 0)),
|
||
[croppedImages],
|
||
);
|
||
|
||
useEffect(() => {
|
||
if (!orderedImages.length) return undefined;
|
||
let cancelled = false;
|
||
const origin =
|
||
typeof window !== 'undefined' ? window.location.origin : '';
|
||
const prefix = `${origin}${assetBase}`;
|
||
const paths = {
|
||
workerPath: `${prefix}/tesseract/worker.min.js`,
|
||
corePath: `${prefix}/tesseract/tesseract-core-simd-lstm.wasm.js`,
|
||
langPath: `${prefix}/tesseract`,
|
||
};
|
||
|
||
const initWorker = async () => {
|
||
setWorkerReady(false);
|
||
try {
|
||
const workerOptions = {
|
||
workerPath: paths.workerPath,
|
||
corePath: paths.corePath,
|
||
langPath: paths.langPath,
|
||
};
|
||
if (isDev) {
|
||
workerOptions.logger = (m) => console.log('Tesseract:', m);
|
||
}
|
||
const worker = await Tesseract.createWorker(
|
||
'tur', // Dil doğrudan belirt
|
||
1, // OEM level (LSTM)
|
||
workerOptions,
|
||
);
|
||
|
||
// Türkçe karakter tanımını iyileştir
|
||
await worker.setParameters({
|
||
tessedit_char_whitelist: 'abcçdefgğhıijklmnoöprsştuüvyzâîûABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ0123456789 .,;:!?\'"-_',
|
||
tessedit_pageseg_mode: '6', // Tek bir metin bloğu varsay
|
||
preserve_interword_spaces: '1',
|
||
});
|
||
if (cancelled) {
|
||
await worker.terminate();
|
||
return;
|
||
}
|
||
// Dil ve worker zaten createWorker sırasında yüklendi
|
||
if (isDev) {
|
||
console.log('Tesseract worker başarıyla oluşturuldu');
|
||
}
|
||
workerRef.current = worker;
|
||
setWorkerReady(true);
|
||
} catch (error) {
|
||
console.error('Tesseract başlatma hatası:', error);
|
||
let errorMessage;
|
||
|
||
if (error.message.includes('traineddata')) {
|
||
errorMessage = 'Tesseract dil dosyaları bulunamadı. Lütfen tarayıcı cache\'ini temizleyip sayfayı yenileyin.';
|
||
} else if (error.message.includes('TESSDATA_PREFIX')) {
|
||
errorMessage = 'Tesseract yapılandırma hatası: Lütfen sayfayı yenileyin.';
|
||
} else {
|
||
errorMessage = `Tesseract başlatılamadı: ${error.message}`;
|
||
}
|
||
|
||
setError(errorMessage);
|
||
setWorkerReady(false);
|
||
}
|
||
};
|
||
|
||
initWorker();
|
||
return () => {
|
||
cancelled = true;
|
||
if (workerRef.current) {
|
||
workerRef.current.terminate();
|
||
workerRef.current = null;
|
||
setWorkerReady(false);
|
||
}
|
||
};
|
||
}, [assetBase, isDev, orderedImages.length, setError]);
|
||
|
||
useEffect(() => {
|
||
if (!orderedImages.length && !(epubImports.length || pdfImports.length)) return;
|
||
setStatus('idle');
|
||
setCurrentIndex(0);
|
||
setPreviewText(ocrText || '');
|
||
if (!ocrText) {
|
||
setOcrText('');
|
||
}
|
||
clearTranslation();
|
||
}, [clearTranslation, epubImports.length, orderedImages, pdfImports.length, ocrText, setOcrText]);
|
||
|
||
useEffect(() => {
|
||
if (previewRef.current) {
|
||
previewRef.current.scrollTop = previewRef.current.scrollHeight;
|
||
}
|
||
}, [previewText]);
|
||
useEffect(() => {
|
||
if (!total || status === 'done' || !workerReady) return;
|
||
abortRef.current = false;
|
||
const run = async () => {
|
||
setStatus('running');
|
||
setCurrentIndex(0);
|
||
const worker = workerRef.current;
|
||
if (!worker) return;
|
||
try {
|
||
let combinedText = '';
|
||
setOcrText('');
|
||
setPreviewText('');
|
||
for (let index = 0; index < orderedImages.length; index += 1) {
|
||
if (abortRef.current) break;
|
||
const image = orderedImages[index];
|
||
setCurrentIndex(index + 1);
|
||
// eslint-disable-next-line no-await-in-loop
|
||
const { data } = await worker.recognize(image.blob);
|
||
const correctedText = correctTurkishCharacters(data.text || '');
|
||
if (correctedText) {
|
||
combinedText = combinedText
|
||
? `${combinedText}\n\n${correctedText}`
|
||
: correctedText;
|
||
setPreviewText(combinedText);
|
||
}
|
||
}
|
||
if (!abortRef.current) {
|
||
setOcrText(combinedText);
|
||
setStatus('done');
|
||
}
|
||
} catch (error) {
|
||
if (!abortRef.current) {
|
||
setError(error.message);
|
||
setStatus('idle');
|
||
}
|
||
}
|
||
};
|
||
run();
|
||
return () => {
|
||
abortRef.current = true;
|
||
};
|
||
}, [orderedImages, setError, setOcrText, status, total, workerReady]);
|
||
|
||
|
||
if (!orderedImages.length && !epubImports.length && !pdfImports.length) {
|
||
return (
|
||
<Stack spacing={2}>
|
||
<Alert severity="info">Önce görselleri cropla.</Alert>
|
||
<Button variant="contained" onClick={() => navigate('/bulk-crop')}>
|
||
Toplu Crop adımına dön
|
||
</Button>
|
||
</Stack>
|
||
);
|
||
}
|
||
|
||
if (!orderedImages.length && (epubImports.length || pdfImports.length)) {
|
||
const documentCount = epubImports.length + pdfImports.length;
|
||
return (
|
||
<Stack spacing={4}>
|
||
{bookMetadata && (
|
||
<Typography variant="body2" color="success.main">
|
||
Seçilen kitap: <strong>{bookMetadata.title}</strong>
|
||
{bookMetadata.authors?.length ? ` • ${bookMetadata.authors.join(', ')}` : ''}
|
||
</Typography>
|
||
)}
|
||
<Box textAlign="center">
|
||
<Typography variant="h5">Belge metni ayrıştırıldı</Typography>
|
||
<Typography color="text.secondary">
|
||
{documentCount > 1
|
||
? `${documentCount} belge dosyasından metin çıkarıldı.`
|
||
: 'Yüklediğin belgedeki metin çıkarıldı.'}
|
||
</Typography>
|
||
</Box>
|
||
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
|
||
<Stack direction="row" alignItems="center" justifyContent="space-between">
|
||
<Typography variant="subtitle1">Ön izleme</Typography>
|
||
<Button
|
||
size="small"
|
||
variant={isEditing ? 'contained' : 'outlined'}
|
||
onClick={() => setIsEditing((prev) => !prev)}
|
||
>
|
||
{isEditing ? 'Görünüme geç' : 'Metni düzenle'}
|
||
</Button>
|
||
</Stack>
|
||
{isEditing ? (
|
||
<TextField
|
||
fullWidth
|
||
multiline
|
||
minRows={6}
|
||
value={previewText}
|
||
onChange={handlePreviewChange}
|
||
sx={{ mt: 1 }}
|
||
/>
|
||
) : (
|
||
<Box
|
||
ref={previewRef}
|
||
sx={{
|
||
mt: 1,
|
||
maxHeight: '10em',
|
||
overflowY: 'auto',
|
||
whiteSpace: 'pre-wrap',
|
||
lineHeight: 1.5,
|
||
fontSize: '0.95rem',
|
||
color: 'text.secondary',
|
||
pr: 1,
|
||
}}
|
||
>
|
||
{previewText || ocrText?.trim() || 'Metin ayrıştırılıyor...'}
|
||
</Box>
|
||
)}
|
||
</Box>
|
||
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
|
||
<Button variant="contained" onClick={() => navigate('/')}>
|
||
Yükleme adımına dön
|
||
</Button>
|
||
<Button
|
||
variant="contained"
|
||
onClick={() => navigate('/translate')}
|
||
disabled={!ocrText?.trim()}
|
||
>
|
||
Çeviri adımına geç
|
||
</Button>
|
||
</Stack>
|
||
</Stack>
|
||
);
|
||
}
|
||
|
||
const progressValue =
|
||
workerReady && total ? (currentIndex / total) * 100 : 0;
|
||
const progressVariant = workerReady ? 'determinate' : 'indeterminate';
|
||
const progressText = !workerReady
|
||
? 'OCR işçisi hazırlanıyor...'
|
||
: status === 'done'
|
||
? 'OCR işlemi tamamlandı.'
|
||
: `Şu an ${currentIndex}/${total} resim işleniyor`;
|
||
|
||
return (
|
||
<Stack spacing={4}>
|
||
{bookMetadata && (
|
||
<Typography variant="body2" color="success.main">
|
||
Seçilen kitap: <strong>{bookMetadata.title}</strong>
|
||
{bookMetadata.authors?.length ? ` • ${bookMetadata.authors.join(', ')}` : ''}
|
||
</Typography>
|
||
)}
|
||
<Box textAlign="center">
|
||
<Typography variant="h5">OCR işlemi</Typography>
|
||
<Typography color="text.secondary">
|
||
Tüm görseller sırayla işleniyor. Bu adım biraz sürebilir.
|
||
</Typography>
|
||
</Box>
|
||
<Box>
|
||
<LinearProgress
|
||
variant={progressVariant}
|
||
value={progressVariant === 'determinate' ? progressValue : undefined}
|
||
sx={{ height: 10, borderRadius: 5 }}
|
||
/>
|
||
<Typography mt={2} align="center">
|
||
{progressText}
|
||
</Typography>
|
||
</Box>
|
||
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
|
||
<Stack direction="row" alignItems="center" justifyContent="space-between">
|
||
<Typography variant="subtitle1">Ön izleme</Typography>
|
||
<Button
|
||
size="small"
|
||
variant={isEditing ? 'contained' : 'outlined'}
|
||
onClick={() => setIsEditing((prev) => !prev)}
|
||
>
|
||
{isEditing ? 'Görünüme geç' : 'Metni düzenle'}
|
||
</Button>
|
||
</Stack>
|
||
{isEditing ? (
|
||
<TextField
|
||
fullWidth
|
||
multiline
|
||
minRows={6}
|
||
value={previewText}
|
||
onChange={handlePreviewChange}
|
||
sx={{ mt: 1 }}
|
||
/>
|
||
) : (
|
||
<Box
|
||
ref={previewRef}
|
||
sx={{
|
||
mt: 1,
|
||
maxHeight: '8.5em',
|
||
overflowY: 'auto',
|
||
whiteSpace: 'pre-wrap',
|
||
lineHeight: 1.5,
|
||
fontSize: '0.95rem',
|
||
color: 'text.secondary',
|
||
pr: 1,
|
||
}}
|
||
>
|
||
{previewText || 'Metin bekleniyor'}
|
||
</Box>
|
||
)}
|
||
</Box>
|
||
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
|
||
<Button variant="contained" onClick={() => navigate('/bulk-crop')}>
|
||
Geri dön
|
||
</Button>
|
||
<Button
|
||
variant="contained"
|
||
onClick={() => navigate('/translate')}
|
||
disabled={status !== 'done'}
|
||
>
|
||
Çeviri adımına geç
|
||
</Button>
|
||
</Stack>
|
||
</Stack>
|
||
);
|
||
};
|
||
|
||
export default OcrStep;
|