Schema Markup Extractor Online

Extract Schema.org structured data from any HTML. Detect JSON-LD, Microdata, and RDFa instantly. Free, offline, client-side - secure.

Extract JSON-LD, Microdata, and RDFa structured data from any HTML page. Runs entirely in your browser — paste or upload HTML, nothing is sent anywhere.

How to Use the Schema Markup Extractor

  1. Grab the HTML. On any web page, right-click → "View Page Source" (or press Ctrl+U / Cmd+U), then select all and copy. The tool cannot fetch URLs directly because browser CORS blocks cross-origin JavaScript requests.
  2. Choose an input method. Use the "Paste HTML" tab for quick pastes, or the "Upload File" tab to load a saved .html / .htm file from your device.
  3. Extract schemas. Click "Extract Schema" or press Ctrl+Enter (Cmd+Enter on Mac). The tool parses JSON-LD ` '); * // => [{ id: 'jsonld-0', method: 'JSON-LD', type: 'Organization', * // schema: { '@type':'Organization', name:'Acme' }, * // jsonString: '{\n "@type": "Organization",\n "name": "Acme"\n}' }] * * @example * SchemaExtractorLogic.extractJSONLD('

    no json-ld here

    '); * // => [] */ extractJSONLD(htmlString) { const schemas = []; const doc = new DOMParser().parseFromString(htmlString, 'text/html'); const scripts = doc.querySelectorAll('script[type="application/ld+json"]'); scripts.forEach((script, idx) => { const jsonText = (script.textContent || '').trim(); if (!jsonText) return; try { const parsed = JSON.parse(jsonText); const schemaType = parsed['@type'] || 'Unknown'; schemas.push({ id: 'jsonld-' + idx, method: 'JSON-LD', type: Array.isArray(schemaType) ? schemaType.join(', ') : schemaType, schema: parsed, jsonString: JSON.stringify(parsed, null, 2) }); } catch (e) { schemas.push({ id: 'jsonld-' + idx, method: 'JSON-LD', type: 'Parse Error', schema: null, jsonString: 'Error: ' + e.message }); } }); return schemas; }, /** * Recursively parse a DOM element that has `itemscope` as a Microdata * item, returning a plain JS object with `@type` and property keys. * * @param {Element} element - A DOM element with `itemscope`. * @returns {Object} * * @example * // Given an element like: * //
    * // Ada * //
    * // returns { '@type': 'Person', name: 'Ada' } */ parseMicrodataItem(element) { const schema = {}; const itemType = element.getAttribute('itemtype'); if (itemType) schema['@type'] = itemType.split('/').pop(); const properties = element.querySelectorAll('[itemprop]'); properties.forEach(prop => { const propName = prop.getAttribute('itemprop'); let propValue; if (prop.hasAttribute('itemscope')) { propValue = this.parseMicrodataItem(prop); } else if (prop.hasAttribute('content')) { propValue = prop.getAttribute('content'); } else if (prop.hasAttribute('href')) { propValue = prop.getAttribute('href'); } else if (prop.hasAttribute('src')) { propValue = prop.getAttribute('src'); } else { propValue = (prop.textContent || '').trim(); } if (schema[propName] !== undefined) { schema[propName] = Array.isArray(schema[propName]) ? [...schema[propName], propValue] : [schema[propName], propValue]; } else { schema[propName] = propValue; } }); return schema; }, /** * Extract every top-level Microdata `itemscope` block from the given HTML. * * @param {string} htmlString * @returns {Array<{id: string, method: string, type: string, schema: Object, jsonString: string}>} * * @example * SchemaExtractorLogic.extractMicrodata( * '
    Dune
    ' * ); * // => [{ method: 'Microdata', type: 'Book', schema: { '@type': 'Book', name: 'Dune' }, ... }] */ extractMicrodata(htmlString) { const schemas = []; const doc = new DOMParser().parseFromString(htmlString, 'text/html'); const items = doc.querySelectorAll('[itemscope]'); items.forEach((item, idx) => { const schema = this.parseMicrodataItem(item); const itemType = item.getAttribute('itemtype') || 'Unknown'; const typeName = itemType.split('/').pop(); schemas.push({ id: 'microdata-' + idx, method: 'Microdata', type: typeName, schema, jsonString: JSON.stringify(schema, null, 2) }); }); return schemas; }, /** * Parse an RDFa element (with `typeof`) and its `property` descendants * into a plain JS object. * * @param {Element} element * @returns {Object} * * @example * //
    Widget
    * // returns { '@type': 'Product', name: 'Widget' } */ parseRDFaItem(element) { const schema = {}; const typeOf = element.getAttribute('typeof'); if (typeOf) schema['@type'] = typeOf; const properties = element.querySelectorAll('[property]'); properties.forEach(prop => { const propName = prop.getAttribute('property'); let propValue; if (prop.hasAttribute('content')) { propValue = prop.getAttribute('content'); } else if (prop.hasAttribute('href')) { propValue = prop.getAttribute('href'); } else if (prop.hasAttribute('src')) { propValue = prop.getAttribute('src'); } else { propValue = (prop.textContent || '').trim(); } if (schema[propName] !== undefined) { schema[propName] = Array.isArray(schema[propName]) ? [...schema[propName], propValue] : [schema[propName], propValue]; } else { schema[propName] = propValue; } }); return schema; }, /** * Extract every RDFa `typeof` block from the given HTML. * * @param {string} htmlString * @returns {Array<{id: string, method: string, type: string, schema: Object, jsonString: string}>} * * @example * SchemaExtractorLogic.extractRDFa( * '
    Ada
    ' * ); * // => [{ method: 'RDFa', type: 'Person', schema: { '@type':'Person', name:'Ada' }, ... }] */ extractRDFa(htmlString) { const schemas = []; const doc = new DOMParser().parseFromString(htmlString, 'text/html'); const items = doc.querySelectorAll('[typeof]'); items.forEach((item, idx) => { const schema = this.parseRDFaItem(item); const typeOf = item.getAttribute('typeof') || 'Unknown'; schemas.push({ id: 'rdfa-' + idx, method: 'RDFa', type: typeOf, schema, jsonString: JSON.stringify(schema, null, 2) }); }); return schemas; }, /** * Run all three extractors on the HTML and combine results. Returns a * `success` flag, an array of schemas, a total count, and a breakdown by * method — or an error object for empty input. * * @param {string} htmlString * @returns {{success: boolean, error: (string|undefined), schemas: (Array|undefined), count: (number|undefined), breakdown: ({jsonLd: number, microdata: number, rdfa: number}|undefined)}} * * @example * const r = SchemaExtractorLogic.extractAllSchemas(''); * r.count; // => 1 * r.breakdown; // => { jsonLd: 1, microdata: 0, rdfa: 0 } * * @example * SchemaExtractorLogic.extractAllSchemas(''); * // => { success: false, error: 'HTML source code is required' } */ extractAllSchemas(htmlString) { if (!htmlString || htmlString.trim() === '') { return { success: false, error: 'HTML source code is required' }; } const jsonLd = this.extractJSONLD(htmlString); const microdata = this.extractMicrodata(htmlString); const rdfa = this.extractRDFa(htmlString); const schemas = [...jsonLd, ...microdata, ...rdfa]; if (schemas.length === 0) { return { success: false, error: 'No schema markup detected in the HTML', schemas: [] }; } return { success: true, schemas, count: schemas.length, breakdown: { jsonLd: jsonLd.length, microdata: microdata.length, rdfa: rdfa.length } }; } }; if (typeof module !== 'undefined' && typeof module.exports !== 'undefined') { module.exports = SchemaExtractorLogic; } const elements = { tabHTML: document.getElementById('tab-html'), tabFile: document.getElementById('tab-file'), panelHTML: document.getElementById('panel-html'), panelFile: document.getElementById('panel-file'), htmlInput: document.getElementById('html-input'), fileInput: document.getElementById('file-input'), extractBtn: document.getElementById('dtf-extract-btn'), clearBtn: document.getElementById('dtf-clear-btn'), statusBanner: document.getElementById('dtf-status-banner'), resultsSection: document.getElementById('dtf-results-section'), schemaCount: document.getElementById('schema-count'), schemasContainer: document.getElementById('dtf-schemas-container'), stats: document.getElementById('dtf-stats'), copyAllBtn: document.getElementById('dtf-copy-all-btn'), downloadBtn: document.getElementById('dtf-download-btn'), toast: document.getElementById('dtf-toast') }; let currentSchemas = []; function showToast(message, type) { const variant = type || 'success'; elements.toast.textContent = message; elements.toast.className = 'dtf-toast dtf-toast-' + variant; elements.toast.classList.remove('hidden'); setTimeout(() => elements.toast.classList.add('hidden'), 2600); } function showStatusBanner(msg, type) { elements.statusBanner.textContent = msg; elements.statusBanner.className = 'dtf-status-banner ' + (type || 'success'); elements.statusBanner.classList.remove('hidden'); } function hideStatusBanner() { elements.statusBanner.classList.add('hidden'); } function showResults() { elements.resultsSection.classList.remove('hidden'); } function hideResults() { elements.resultsSection.classList.add('hidden'); } function switchTab(tab) { if (tab === 'html') { elements.tabHTML.classList.add('active'); elements.tabFile.classList.remove('active'); elements.panelHTML.classList.add('active'); elements.panelHTML.classList.remove('hidden'); elements.panelFile.classList.remove('active'); elements.panelFile.classList.add('hidden'); } else { elements.tabFile.classList.add('active'); elements.tabHTML.classList.remove('active'); elements.panelFile.classList.add('active'); elements.panelFile.classList.remove('hidden'); elements.panelHTML.classList.remove('active'); elements.panelHTML.classList.add('hidden'); } } function displaySchemas(schemas) { elements.schemasContainer.innerHTML = ''; schemas.forEach((schema, idx) => { const card = document.createElement('div'); card.className = 'dtf-schema-card'; const header = document.createElement('div'); header.className = 'dtf-schema-header'; const typeLabel = document.createElement('div'); typeLabel.className = 'dtf-schema-type'; typeLabel.textContent = schema.type; const methodBadge = document.createElement('span'); methodBadge.className = 'dtf-schema-method'; methodBadge.textContent = schema.method; header.appendChild(typeLabel); header.appendChild(methodBadge); const content = document.createElement('pre'); content.className = 'dtf-schema-content'; content.textContent = schema.jsonString; const actions = document.createElement('div'); actions.className = 'dtf-schema-actions'; const copyBtn = document.createElement('button'); copyBtn.className = 'dtf-btn-copy-single'; copyBtn.textContent = '📋 Copy this schema'; copyBtn.addEventListener('click', () => copySingleSchema(idx)); actions.appendChild(copyBtn); card.appendChild(header); card.appendChild(content); card.appendChild(actions); elements.schemasContainer.appendChild(card); }); } function updateStats(result) { if (!result || !result.breakdown) { elements.stats.textContent = ''; return; } const b = result.breakdown; elements.stats.textContent = `${result.count} total · ${b.jsonLd} JSON-LD · ${b.microdata} Microdata · ${b.rdfa} RDFa`; } function handleExtract() { const htmlSource = elements.htmlInput.value; hideStatusBanner(); const result = SchemaExtractorLogic.extractAllSchemas(htmlSource); if (!result.success) { showStatusBanner(result.error, 'error'); showToast(result.error, 'error'); hideResults(); updateStats(null); currentSchemas = []; return; } currentSchemas = result.schemas; displaySchemas(result.schemas); showResults(); elements.schemaCount.textContent = result.count; updateStats(result); showStatusBanner(`Found ${result.count} schema${result.count === 1 ? '' : 's'}.`, 'success'); showToast(`Found ${result.count} schema${result.count === 1 ? '' : 's'}.`, 'success'); } function handleFileUpload(e) { const file = e.target.files[0]; if (!file) return; const reader = new FileReader(); reader.onload = (ev) => { elements.htmlInput.value = ev.target.result; switchTab('html'); showToast(`Loaded ${file.name}`, 'info'); }; reader.onerror = () => showToast('Could not read file.', 'error'); reader.readAsText(file); } function handleClear() { elements.htmlInput.value = ''; elements.fileInput.value = ''; currentSchemas = []; hideResults(); hideStatusBanner(); updateStats(null); switchTab('html'); showToast('Cleared.', 'info'); } function copySingleSchema(idx) { if (!currentSchemas[idx]) return; copyToClipboard(currentSchemas[idx].jsonString, 'Schema copied to clipboard.'); } function handleCopyAll() { if (currentSchemas.length === 0) { showToast('Nothing to copy — extract schemas first.', 'error'); return; } const allText = currentSchemas.map(s => s.jsonString).join('\n\n---\n\n'); copyToClipboard(allText, `All ${currentSchemas.length} schemas copied.`); } async function copyToClipboard(text, successMsg) { try { await navigator.clipboard.writeText(text); showToast(successMsg, 'success'); } catch { fallbackCopy(text, successMsg); } } function fallbackCopy(text, successMsg) { const ta = document.createElement('textarea'); ta.value = text; ta.setAttribute('readonly', ''); ta.style.position = 'fixed'; ta.style.opacity = '0'; document.body.appendChild(ta); ta.select(); let ok = false; try { ok = document.execCommand('copy'); } catch { ok = false; } document.body.removeChild(ta); showToast(ok ? successMsg : 'Copy failed — please select and copy manually.', ok ? 'success' : 'error'); } function handleDownload() { if (currentSchemas.length === 0) { showToast('Nothing to download — extract schemas first.', 'error'); return; } const allSchemas = currentSchemas.map(s => s.schema).filter(s => s !== null); const blob = new Blob([JSON.stringify(allSchemas, null, 2)], { type: 'application/json' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = 'extracted-schemas.json'; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); showToast('Downloaded extracted-schemas.json', 'success'); } function handleKeydown(e) { if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') { e.preventDefault(); handleExtract(); } } elements.tabHTML.addEventListener('click', () => switchTab('html')); elements.tabFile.addEventListener('click', () => switchTab('file')); elements.fileInput.addEventListener('change', handleFileUpload); elements.extractBtn.addEventListener('click', handleExtract); elements.clearBtn.addEventListener('click', handleClear); elements.copyAllBtn.addEventListener('click', handleCopyAll); elements.downloadBtn.addEventListener('click', handleDownload); document.addEventListener('keydown', handleKeydown); } if (document.readyState === 'loading') { document.addEventListener('DOMContentLoaded', init); } else { init(); } })();

How to Use Schema Markup Extractor Online

  1. Grab the HTML. On any web page, right-click → "View Page Source" (or press Ctrl+U / Cmd+U), then select all and copy. The tool cannot fetch URLs directly because browser CORS blocks cross-origin JavaScript requests.
  2. Choose an input method. Use the "Paste HTML" tab for quick pastes, or the "Upload File" tab to load a saved .html / .htm file from your device.
  3. Extract schemas. Click "Extract Schema" or press Ctrl+Enter (Cmd+Enter on Mac). The tool parses JSON-LD `` blocks, Microdata `itemscope` elements, and RDFa `typeof` elements in one pass.
  4. Read the results. Each schema appears as a card showing the detected `@type`, a badge for the extraction method (JSON-LD / Microdata / RDFa), and the full JSON pretty-printed. The stats line summarises the counts by method.
  5. Copy individually or all at once. Use the per-card "Copy this schema" button for a single block, or "Copy All" at the bottom to get every schema separated by `---`.
  6. Download as JSON. "Download JSON" saves an array of all schema objects to `extracted-schemas.json` - handy for diffing, archiving, or feeding into testing tools.
  7. Clear and repeat. "Clear All" resets the textarea, the file input, and the results section so you can extract from a different page without reloading.

Frequently Asked Questions

Is my HTML data secure?

Yes. All extraction happens in your browser using the built-in DOMParser. No HTML is uploaded to any server; your data remains 100% private.

What is schema markup?

Schema markup is structured data that helps search engines understand your content. It comes in three flavours: JSON-LD (script blocks), Microdata (HTML attributes), and RDFa (similar attribute syntax). Search engines use it for rich results like review stars, recipe cards, and event cards.

Why can’t I enter a URL directly?

Browser security (CORS policy) prevents client-side JavaScript from fetching arbitrary URLs. To keep this tool 100% client-side and privacy-respecting, we ask you to paste the HTML yourself instead of relying on a server-side proxy.

Is this tool free?

Yes, completely free with no sign-up required. Extract unlimited schemas from any HTML source.

Does it work offline?

Yes. Once the page loads, extraction runs locally in your browser – no network calls. Ideal for working on internal or confidential pages.

What happens with malformed JSON-LD?

A broken JSON-LD block is surfaced as a “Parse Error” card showing the specific error message. Other valid schemas on the same page are still extracted normally.

How are nested Microdata items handled?

The Microdata parser walks `itemscope` elements recursively, so a `Product` containing an `Offer` is represented as a nested JSON object with the correct `@type` on each level.

What’s the difference between JSON-LD, Microdata, and RDFa?

JSON-LD is a separate <script> block with pure JSON – easiest to maintain and Google’s preferred format. Microdata uses HTML attributes inline with content (`itemprop=”name”`). RDFa uses `typeof` and `property` attributes. All three convey the same schema information.

Does it support multiple schemas on one page?

Yes. Every “ block, every `itemscope` element, and every `typeof` element becomes its own card. The stats line shows the breakdown.

Can I use the downloaded JSON in Google’s Rich Results Test?

Not directly – Google’s test accepts a URL or raw HTML, not a schema array. But the downloaded JSON is useful for diffing against previous extractions, feeding into custom validators, or archiving a page’s structured data over time.