Resim ile beraber epub da yükleme özelliği eklendi
This commit is contained in:
@@ -16,6 +16,7 @@
|
|||||||
"@mui/icons-material": "^6.1.1",
|
"@mui/icons-material": "^6.1.1",
|
||||||
"@mui/material": "^6.1.1",
|
"@mui/material": "^6.1.1",
|
||||||
"@supabase/supabase-js": "^2.81.1",
|
"@supabase/supabase-js": "^2.81.1",
|
||||||
|
"jszip": "^3.10.1",
|
||||||
"react": "^18.3.1",
|
"react": "^18.3.1",
|
||||||
"react-dom": "^18.3.1",
|
"react-dom": "^18.3.1",
|
||||||
"react-dropzone": "^14.2.3",
|
"react-dropzone": "^14.2.3",
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ const OcrStep = () => {
|
|||||||
const setError = useAppStore((state) => state.setError);
|
const setError = useAppStore((state) => state.setError);
|
||||||
const clearTranslation = useAppStore((state) => state.clearTranslation);
|
const clearTranslation = useAppStore((state) => state.clearTranslation);
|
||||||
const bookMetadata = useAppStore((state) => state.bookMetadata);
|
const bookMetadata = useAppStore((state) => state.bookMetadata);
|
||||||
|
const epubImports = useAppStore((state) => state.epubImports);
|
||||||
const [status, setStatus] = useState('idle');
|
const [status, setStatus] = useState('idle');
|
||||||
const [currentIndex, setCurrentIndex] = useState(0);
|
const [currentIndex, setCurrentIndex] = useState(0);
|
||||||
const [previewText, setPreviewText] = useState('');
|
const [previewText, setPreviewText] = useState('');
|
||||||
@@ -118,6 +119,7 @@ const OcrStep = () => {
|
|||||||
}, [assetBase, isDev, orderedImages.length, setError]);
|
}, [assetBase, isDev, orderedImages.length, setError]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
if (!orderedImages.length) return;
|
||||||
setStatus('idle');
|
setStatus('idle');
|
||||||
setCurrentIndex(0);
|
setCurrentIndex(0);
|
||||||
setPreviewText('');
|
setPreviewText('');
|
||||||
@@ -174,7 +176,7 @@ const OcrStep = () => {
|
|||||||
}, [orderedImages, setError, setOcrText, status, total, workerReady]);
|
}, [orderedImages, setError, setOcrText, status, total, workerReady]);
|
||||||
|
|
||||||
|
|
||||||
if (!orderedImages.length) {
|
if (!orderedImages.length && !epubImports.length) {
|
||||||
return (
|
return (
|
||||||
<Stack spacing={2}>
|
<Stack spacing={2}>
|
||||||
<Alert severity="info">Önce görselleri cropla.</Alert>
|
<Alert severity="info">Önce görselleri cropla.</Alert>
|
||||||
@@ -185,6 +187,57 @@ const OcrStep = () => {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!orderedImages.length && epubImports.length) {
|
||||||
|
return (
|
||||||
|
<Stack spacing={4}>
|
||||||
|
{bookMetadata && (
|
||||||
|
<Typography variant="body2" color="success.main">
|
||||||
|
Seçilen kitap: <strong>{bookMetadata.title}</strong>
|
||||||
|
{bookMetadata.authors?.length ? ` • ${bookMetadata.authors.join(', ')}` : ''}
|
||||||
|
</Typography>
|
||||||
|
)}
|
||||||
|
<Box textAlign="center">
|
||||||
|
<Typography variant="h5">EPUB metni ayrıştırıldı</Typography>
|
||||||
|
<Typography color="text.secondary">
|
||||||
|
{epubImports.length > 1
|
||||||
|
? `${epubImports.length} EPUB dosyasından metin çıkarıldı.`
|
||||||
|
: 'EPUB dosyasındaki metin çıkarıldı.'}
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
<Box sx={{ p: 2, borderRadius: 2, bgcolor: 'background.default' }}>
|
||||||
|
<Typography variant="subtitle1">Ön izleme</Typography>
|
||||||
|
<Box
|
||||||
|
ref={previewRef}
|
||||||
|
sx={{
|
||||||
|
mt: 1,
|
||||||
|
maxHeight: '10em',
|
||||||
|
overflowY: 'auto',
|
||||||
|
whiteSpace: 'pre-wrap',
|
||||||
|
lineHeight: 1.5,
|
||||||
|
fontSize: '0.95rem',
|
||||||
|
color: 'text.secondary',
|
||||||
|
pr: 1,
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{ocrText?.trim() || 'Metin ayrıştırılıyor...'}
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
<Stack direction={{ xs: 'column', sm: 'row' }} spacing={2} justifyContent="space-between">
|
||||||
|
<Button variant="contained" onClick={() => navigate('/')}>
|
||||||
|
Yükleme adımına dön
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
variant="contained"
|
||||||
|
onClick={() => navigate('/translate')}
|
||||||
|
disabled={!ocrText?.trim()}
|
||||||
|
>
|
||||||
|
Çeviri adımına geç
|
||||||
|
</Button>
|
||||||
|
</Stack>
|
||||||
|
</Stack>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const progressValue =
|
const progressValue =
|
||||||
workerReady && total ? (currentIndex / total) * 100 : 0;
|
workerReady && total ? (currentIndex / total) * 100 : 0;
|
||||||
const progressVariant = workerReady ? 'determinate' : 'indeterminate';
|
const progressVariant = workerReady ? 'determinate' : 'indeterminate';
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import {
|
|||||||
} from '@mui/material';
|
} from '@mui/material';
|
||||||
import { useNavigate } from 'react-router-dom';
|
import { useNavigate } from 'react-router-dom';
|
||||||
import { useAppStore } from '../store/useAppStore';
|
import { useAppStore } from '../store/useAppStore';
|
||||||
|
import { extractTextFromEpub } from '../utils/epubImport';
|
||||||
|
|
||||||
const dropzoneStyle = {
|
const dropzoneStyle = {
|
||||||
border: '2px dashed rgba(108, 155, 207, 0.7)',
|
border: '2px dashed rgba(108, 155, 207, 0.7)',
|
||||||
@@ -39,16 +40,23 @@ const UploadStep = () => {
|
|||||||
const setBookTitle = useAppStore((state) => state.setBookTitle);
|
const setBookTitle = useAppStore((state) => state.setBookTitle);
|
||||||
const bookMetadata = useAppStore((state) => state.bookMetadata);
|
const bookMetadata = useAppStore((state) => state.bookMetadata);
|
||||||
const setBookMetadata = useAppStore((state) => state.setBookMetadata);
|
const setBookMetadata = useAppStore((state) => state.setBookMetadata);
|
||||||
|
const epubImports = useAppStore((state) => state.epubImports);
|
||||||
|
const setEpubImports = useAppStore((state) => state.setEpubImports);
|
||||||
|
const setOcrText = useAppStore((state) => state.setOcrText);
|
||||||
|
const clearTranslation = useAppStore((state) => state.clearTranslation);
|
||||||
|
const setError = useAppStore((state) => state.setError);
|
||||||
const [searchResults, setSearchResults] = useState([]);
|
const [searchResults, setSearchResults] = useState([]);
|
||||||
const [searching, setSearching] = useState(false);
|
const [searching, setSearching] = useState(false);
|
||||||
const [searchError, setSearchError] = useState(null);
|
const [searchError, setSearchError] = useState(null);
|
||||||
const [selectedBookId, setSelectedBookId] = useState(bookMetadata?.id || null);
|
const [selectedBookId, setSelectedBookId] = useState(bookMetadata?.id || null);
|
||||||
const skipSearchRef = useRef(false);
|
const skipSearchRef = useRef(false);
|
||||||
const [showResults, setShowResults] = useState(false);
|
const [showResults, setShowResults] = useState(false);
|
||||||
|
const [epubProcessing, setEpubProcessing] = useState(false);
|
||||||
|
|
||||||
const onDrop = useCallback(
|
const onDrop = useCallback(
|
||||||
(acceptedFiles) => {
|
async (acceptedFiles) => {
|
||||||
if (!acceptedFiles.length) return;
|
if (!acceptedFiles.length) return;
|
||||||
|
setEpubProcessing(true);
|
||||||
const preservedMetadata = bookMetadata;
|
const preservedMetadata = bookMetadata;
|
||||||
const preservedTitle = bookTitle;
|
const preservedTitle = bookTitle;
|
||||||
resetFromStep('upload');
|
resetFromStep('upload');
|
||||||
@@ -60,21 +68,92 @@ const UploadStep = () => {
|
|||||||
skipSearchRef.current = true;
|
skipSearchRef.current = true;
|
||||||
setBookTitle(preservedTitle);
|
setBookTitle(preservedTitle);
|
||||||
}
|
}
|
||||||
const mapped = acceptedFiles.map((file, index) => ({
|
|
||||||
id: crypto.randomUUID(),
|
const imageFiles = [];
|
||||||
file,
|
const epubFiles = [];
|
||||||
previewUrl: URL.createObjectURL(file),
|
acceptedFiles.forEach((file) => {
|
||||||
order: uploadedImages.length + index,
|
const isEpub =
|
||||||
filename: file.name,
|
file.type === 'application/epub+zip' || file.name?.toLowerCase().endsWith('.epub');
|
||||||
}));
|
if (isEpub) {
|
||||||
setUploadedImages([...uploadedImages, ...mapped]);
|
epubFiles.push(file);
|
||||||
|
} else {
|
||||||
|
imageFiles.push(file);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (imageFiles.length) {
|
||||||
|
const mapped = imageFiles.map((file, index) => ({
|
||||||
|
id: crypto.randomUUID(),
|
||||||
|
file,
|
||||||
|
previewUrl: URL.createObjectURL(file),
|
||||||
|
order: uploadedImages.length + index,
|
||||||
|
filename: file.name,
|
||||||
|
}));
|
||||||
|
setUploadedImages([...uploadedImages, ...mapped]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const importedEntries = [];
|
||||||
|
if (epubFiles.length) {
|
||||||
|
for (const file of epubFiles) {
|
||||||
|
try {
|
||||||
|
// eslint-disable-next-line no-await-in-loop
|
||||||
|
const parsed = await extractTextFromEpub(file);
|
||||||
|
importedEntries.push({
|
||||||
|
id: crypto.randomUUID(),
|
||||||
|
filename: file.name,
|
||||||
|
size: file.size,
|
||||||
|
text: parsed.text,
|
||||||
|
metadata: parsed.metadata,
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
setError(error.message || `${file.name} okunamadı.`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setEpubImports(importedEntries);
|
||||||
|
clearTranslation();
|
||||||
|
const combinedText = importedEntries.map((entry) => entry.text).filter(Boolean).join('\n\n');
|
||||||
|
if (combinedText) {
|
||||||
|
setOcrText(combinedText);
|
||||||
|
}
|
||||||
|
if (!preservedMetadata && importedEntries[0]?.metadata) {
|
||||||
|
const meta = importedEntries[0].metadata;
|
||||||
|
setBookMetadata({
|
||||||
|
id: `epub-${crypto.randomUUID()}`,
|
||||||
|
title: meta.title || bookTitle || 'İsimsiz EPUB',
|
||||||
|
subtitle: '',
|
||||||
|
authors: meta.authors || [],
|
||||||
|
publisher: meta.publisher || '',
|
||||||
|
publishedDate: meta.publishedDate || '',
|
||||||
|
description: meta.description || '',
|
||||||
|
pageCount: null,
|
||||||
|
categories: meta.categories || [],
|
||||||
|
averageRating: null,
|
||||||
|
ratingsCount: null,
|
||||||
|
language: meta.language || '',
|
||||||
|
infoLink: '',
|
||||||
|
identifiers: meta.identifiers || [],
|
||||||
|
thumbnail: null,
|
||||||
|
});
|
||||||
|
if (!preservedTitle?.trim() && meta.title) {
|
||||||
|
skipSearchRef.current = true;
|
||||||
|
setBookTitle(meta.title);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
setEpubImports([]);
|
||||||
|
}
|
||||||
|
setEpubProcessing(false);
|
||||||
},
|
},
|
||||||
[
|
[
|
||||||
bookMetadata,
|
bookMetadata,
|
||||||
bookTitle,
|
bookTitle,
|
||||||
|
clearTranslation,
|
||||||
resetFromStep,
|
resetFromStep,
|
||||||
setBookMetadata,
|
setBookMetadata,
|
||||||
setBookTitle,
|
setBookTitle,
|
||||||
|
setEpubImports,
|
||||||
|
setError,
|
||||||
|
setOcrText,
|
||||||
setUploadedImages,
|
setUploadedImages,
|
||||||
uploadedImages,
|
uploadedImages,
|
||||||
],
|
],
|
||||||
@@ -175,6 +254,11 @@ const UploadStep = () => {
|
|||||||
accept: {
|
accept: {
|
||||||
'image/png': ['.png'],
|
'image/png': ['.png'],
|
||||||
'image/jpeg': ['.jpg', '.jpeg'],
|
'image/jpeg': ['.jpg', '.jpeg'],
|
||||||
|
'image/webp': ['.webp'],
|
||||||
|
'application/epub+zip': ['.epub'],
|
||||||
|
'application/zip': ['.epub'],
|
||||||
|
'application/x-zip-compressed': ['.epub'],
|
||||||
|
'application/octet-stream': ['.epub'],
|
||||||
},
|
},
|
||||||
multiple: true,
|
multiple: true,
|
||||||
});
|
});
|
||||||
@@ -218,6 +302,11 @@ const UploadStep = () => {
|
|||||||
return { authorsLine, details };
|
return { authorsLine, details };
|
||||||
}, [bookMetadata]);
|
}, [bookMetadata]);
|
||||||
|
|
||||||
|
const hasImages = uploadedImages.length > 0;
|
||||||
|
const hasEpubImports = epubImports.length > 0;
|
||||||
|
const canProceed = hasImages || hasEpubImports;
|
||||||
|
const nextPath = hasImages ? '/crop' : '/ocr';
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Stack spacing={4}>
|
<Stack spacing={4}>
|
||||||
{bookMetadata && (
|
{bookMetadata && (
|
||||||
@@ -377,10 +466,10 @@ const UploadStep = () => {
|
|||||||
<Box {...getRootProps()} sx={dropzoneStyle}>
|
<Box {...getRootProps()} sx={dropzoneStyle}>
|
||||||
<input {...getInputProps()} />
|
<input {...getInputProps()} />
|
||||||
<Typography variant="h5" gutterBottom>
|
<Typography variant="h5" gutterBottom>
|
||||||
Görselleri sürükleyip bırak veya tıkla
|
Görselleri veya EPUB dosyasını sürükleyip bırak ya da tıkla
|
||||||
</Typography>
|
</Typography>
|
||||||
<Typography color="text.secondary" gutterBottom>
|
<Typography color="text.secondary" gutterBottom>
|
||||||
.png, .jpg, .jpeg formatlarında çoklu dosya yükleyebilirsin.
|
.png, .jpg, .jpeg formatlarında çoklu görsel ya da .epub dosyaları yükleyebilirsin.
|
||||||
</Typography>
|
</Typography>
|
||||||
<Button variant="contained" color="primary">
|
<Button variant="contained" color="primary">
|
||||||
Dosya seç
|
Dosya seç
|
||||||
@@ -390,8 +479,51 @@ const UploadStep = () => {
|
|||||||
Bırak ve yükleyelim!
|
Bırak ve yükleyelim!
|
||||||
</Typography>
|
</Typography>
|
||||||
)}
|
)}
|
||||||
|
{epubProcessing && (
|
||||||
|
<Typography mt={2} color="text.secondary">
|
||||||
|
EPUB içeriği ayrıştırılıyor...
|
||||||
|
</Typography>
|
||||||
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
|
{epubImports.length > 0 && (
|
||||||
|
<Box>
|
||||||
|
<Typography variant="h6" gutterBottom>
|
||||||
|
Yüklenen EPUB dosyaları ({epubImports.length})
|
||||||
|
</Typography>
|
||||||
|
<Stack spacing={1.5}>
|
||||||
|
{epubImports.map((item) => (
|
||||||
|
<Paper key={item.id} variant="outlined" sx={{ p: 2, borderRadius: 2 }}>
|
||||||
|
<Typography variant="subtitle1" sx={{ fontWeight: 600 }}>
|
||||||
|
{item.metadata?.title || item.filename}
|
||||||
|
</Typography>
|
||||||
|
{item.metadata?.authors?.length ? (
|
||||||
|
<Typography variant="body2" color="text.secondary" sx={{ fontStyle: 'italic' }}>
|
||||||
|
{item.metadata.authors.join(', ')}
|
||||||
|
</Typography>
|
||||||
|
) : (
|
||||||
|
<Typography variant="body2" color="text.secondary">
|
||||||
|
Yazar bilgisi bulunamadı
|
||||||
|
</Typography>
|
||||||
|
)}
|
||||||
|
<Typography variant="caption" color="text.secondary" display="block" mt={0.5}>
|
||||||
|
{[
|
||||||
|
item.metadata?.publisher,
|
||||||
|
item.metadata?.language ? item.metadata.language.toUpperCase() : null,
|
||||||
|
item.metadata?.publishedDate,
|
||||||
|
]
|
||||||
|
.filter(Boolean)
|
||||||
|
.join(' • ')}
|
||||||
|
</Typography>
|
||||||
|
<Typography variant="caption" color="text.secondary" display="block">
|
||||||
|
{item.filename} • {(item.size / (1024 * 1024)).toFixed(2)} MB
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
))}
|
||||||
|
</Stack>
|
||||||
|
</Box>
|
||||||
|
)}
|
||||||
|
|
||||||
<Box>
|
<Box>
|
||||||
<Typography variant="h6" gutterBottom>
|
<Typography variant="h6" gutterBottom>
|
||||||
Yüklenen görseller ({uploadedImages.length})
|
Yüklenen görseller ({uploadedImages.length})
|
||||||
@@ -441,10 +573,10 @@ const UploadStep = () => {
|
|||||||
<Button
|
<Button
|
||||||
variant="contained"
|
variant="contained"
|
||||||
color="primary"
|
color="primary"
|
||||||
disabled={!uploadedImages.length}
|
disabled={!canProceed}
|
||||||
onClick={() => navigate('/crop')}
|
onClick={() => navigate(nextPath)}
|
||||||
>
|
>
|
||||||
Devam et
|
{hasImages ? 'Crop adımına geç' : 'OCR adımına geç'}
|
||||||
</Button>
|
</Button>
|
||||||
</Stack>
|
</Stack>
|
||||||
</Stack>
|
</Stack>
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ export const useAppStore = create((set) => ({
|
|||||||
coverCropConfig: createEmptyCropConfig(),
|
coverCropConfig: createEmptyCropConfig(),
|
||||||
croppedCoverImage: null,
|
croppedCoverImage: null,
|
||||||
ocrText: '',
|
ocrText: '',
|
||||||
|
epubImports: [],
|
||||||
bookTitle: '',
|
bookTitle: '',
|
||||||
bookMetadata: null,
|
bookMetadata: null,
|
||||||
translatedText: '',
|
translatedText: '',
|
||||||
@@ -74,6 +75,7 @@ export const useAppStore = create((set) => ({
|
|||||||
return { croppedCoverImage: image };
|
return { croppedCoverImage: image };
|
||||||
}),
|
}),
|
||||||
setOcrText: (text) => set({ ocrText: text }),
|
setOcrText: (text) => set({ ocrText: text }),
|
||||||
|
setEpubImports: (imports) => set({ epubImports: imports }),
|
||||||
setBookTitle: (title) => set({ bookTitle: title }),
|
setBookTitle: (title) => set({ bookTitle: title }),
|
||||||
setBookMetadata: (metadata) => set({ bookMetadata: metadata }),
|
setBookMetadata: (metadata) => set({ bookMetadata: metadata }),
|
||||||
setTranslatedText: (text) => set({ translatedText: text }),
|
setTranslatedText: (text) => set({ translatedText: text }),
|
||||||
@@ -151,6 +153,7 @@ export const useAppStore = create((set) => ({
|
|||||||
draft.coverCropConfig = createEmptyCropConfig();
|
draft.coverCropConfig = createEmptyCropConfig();
|
||||||
draft.croppedCoverImage = null;
|
draft.croppedCoverImage = null;
|
||||||
draft.ocrText = '';
|
draft.ocrText = '';
|
||||||
|
draft.epubImports = [];
|
||||||
draft.bookTitle = '';
|
draft.bookTitle = '';
|
||||||
draft.bookMetadata = null;
|
draft.bookMetadata = null;
|
||||||
draft.translatedText = '';
|
draft.translatedText = '';
|
||||||
|
|||||||
131
src/utils/epubImport.js
Normal file
131
src/utils/epubImport.js
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
import JSZip from 'jszip';
|
||||||
|
|
||||||
|
const parseXml = (content) => {
|
||||||
|
const parser = new DOMParser();
|
||||||
|
return parser.parseFromString(content, 'application/xml');
|
||||||
|
};
|
||||||
|
|
||||||
|
const cleanText = (value = '') =>
|
||||||
|
value
|
||||||
|
.replace(/\r/g, '')
|
||||||
|
.replace(/\t/g, ' ')
|
||||||
|
.replace(/\s+\n/g, '\n')
|
||||||
|
.replace(/\n{3,}/g, '\n\n')
|
||||||
|
.replace(/[ \u00A0]{2,}/g, ' ')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
const resolvePath = (basePath, targetPath = '') => {
|
||||||
|
if (!basePath) return targetPath;
|
||||||
|
const baseParts = basePath.split('/').filter(Boolean);
|
||||||
|
const targetParts = targetPath.split('/');
|
||||||
|
for (const part of targetParts) {
|
||||||
|
if (!part || part === '.') continue;
|
||||||
|
if (part === '..') {
|
||||||
|
baseParts.pop();
|
||||||
|
} else {
|
||||||
|
baseParts.push(part);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return baseParts.join('/');
|
||||||
|
};
|
||||||
|
|
||||||
|
const TEXT_MEDIA_TYPES = new Set([
|
||||||
|
'application/xhtml+xml',
|
||||||
|
'application/x-dtbook+xml',
|
||||||
|
'text/html',
|
||||||
|
'text/x-oeb1-document',
|
||||||
|
'application/xml',
|
||||||
|
'text/xml',
|
||||||
|
]);
|
||||||
|
|
||||||
|
const looksLikeTextFile = (href = '') =>
|
||||||
|
/\.(x?html?|xml)$/i.test(href) || /\.(xhtml|html|htm)$/i.test(href);
|
||||||
|
|
||||||
|
const parseMetadata = (metadataNode) => {
|
||||||
|
if (!metadataNode) return {};
|
||||||
|
const getFirst = (selector) => metadataNode.querySelector(selector)?.textContent?.trim();
|
||||||
|
const authors = Array.from(metadataNode.querySelectorAll('creator, dc\\:creator')).map((node) =>
|
||||||
|
node.textContent?.trim(),
|
||||||
|
);
|
||||||
|
const identifiers = Array.from(metadataNode.querySelectorAll('identifier, dc\\:identifier')).map((node) => ({
|
||||||
|
identifier: node.textContent?.trim(),
|
||||||
|
type: node.getAttribute('opf:scheme') || node.getAttribute('id'),
|
||||||
|
}));
|
||||||
|
const categories = Array.from(metadataNode.querySelectorAll('subject, dc\\:subject'))
|
||||||
|
.map((node) => node.textContent?.trim())
|
||||||
|
.filter(Boolean);
|
||||||
|
return {
|
||||||
|
title: getFirst('title, dc\\:title'),
|
||||||
|
language: getFirst('language, dc\\:language'),
|
||||||
|
publisher: getFirst('publisher, dc\\:publisher'),
|
||||||
|
description: getFirst('description, dc\\:description'),
|
||||||
|
publishedDate: getFirst('date, dc\\:date'),
|
||||||
|
authors: authors.filter(Boolean),
|
||||||
|
identifiers: identifiers.filter((item) => item.identifier),
|
||||||
|
categories,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export const extractTextFromEpub = async (file) => {
|
||||||
|
const zip = await JSZip.loadAsync(file);
|
||||||
|
const containerEntry = zip.file('META-INF/container.xml');
|
||||||
|
if (!containerEntry) {
|
||||||
|
throw new Error('EPUB container bilgileri bulunamadı.');
|
||||||
|
}
|
||||||
|
const containerXml = await containerEntry.async('string');
|
||||||
|
const containerDoc = parseXml(containerXml);
|
||||||
|
const rootPath = containerDoc.querySelector('rootfile')?.getAttribute('full-path');
|
||||||
|
if (!rootPath) {
|
||||||
|
throw new Error('EPUB manifest dosyası bulunamadı.');
|
||||||
|
}
|
||||||
|
|
||||||
|
const opfEntry = zip.file(rootPath);
|
||||||
|
if (!opfEntry) {
|
||||||
|
throw new Error('EPUB manifesti okunamadı.');
|
||||||
|
}
|
||||||
|
const opfXml = await opfEntry.async('string');
|
||||||
|
const opfDoc = parseXml(opfXml);
|
||||||
|
const metadata = parseMetadata(opfDoc.querySelector('metadata'));
|
||||||
|
const manifest = {};
|
||||||
|
opfDoc.querySelectorAll('manifest > item').forEach((item) => {
|
||||||
|
manifest[item.getAttribute('id')] = {
|
||||||
|
href: item.getAttribute('href'),
|
||||||
|
mediaType: item.getAttribute('media-type'),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
const baseDir = rootPath.includes('/') ? rootPath.split('/').slice(0, -1).join('/') : '';
|
||||||
|
|
||||||
|
const spineRefs = Array.from(opfDoc.querySelectorAll('spine > itemref'))
|
||||||
|
.map((item) => manifest[item.getAttribute('idref')])
|
||||||
|
.filter(Boolean);
|
||||||
|
|
||||||
|
const parser = new DOMParser();
|
||||||
|
const collectedTexts = [];
|
||||||
|
for (const ref of spineRefs) {
|
||||||
|
if (!ref.href) continue;
|
||||||
|
const shouldRead =
|
||||||
|
(ref.mediaType && TEXT_MEDIA_TYPES.has(ref.mediaType)) || looksLikeTextFile(ref.href);
|
||||||
|
if (!shouldRead) continue;
|
||||||
|
const targetPath = resolvePath(baseDir, ref.href);
|
||||||
|
const entry = zip.file(targetPath);
|
||||||
|
if (!entry) continue;
|
||||||
|
// eslint-disable-next-line no-await-in-loop
|
||||||
|
const markup = await entry.async('string');
|
||||||
|
const doc = parser.parseFromString(markup, 'text/html');
|
||||||
|
const text = doc.body?.textContent || doc.documentElement?.textContent || '';
|
||||||
|
const cleaned = cleanText(text);
|
||||||
|
if (cleaned) {
|
||||||
|
collectedTexts.push(cleaned);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
text: collectedTexts.join('\n\n').trim(),
|
||||||
|
metadata: {
|
||||||
|
...metadata,
|
||||||
|
filename: file.name,
|
||||||
|
fileSize: file.size,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
Reference in New Issue
Block a user