🧩 API Reference

contextpack_md.api

convert_pdf_to_markdown(pdf_path)

Converts a PDF file to Markdown using marker-pdf.

Source code in src/contextpack_md/api.py
def convert_pdf_to_markdown(pdf_path: Path) -> Optional[str]:
    """
    Converts a PDF file to Markdown using marker-pdf.
    """
    try:
        from marker.converters.pdf import PdfConverter
        from marker.models import create_model_dict
        from marker.output import text_from_rendered

        converter = PdfConverter(
            artifact_dict=create_model_dict(),
        )
        rendered = converter(str(pdf_path))
        text, _, _ = text_from_rendered(rendered)
        return text
    except ImportError:
        print(
            "Error: marker-pdf is not installed. Please install it with 'pip install \"contextpack-md[pdf]\"'"
        )
        return None
    except Exception as e:
        print(f"Error converting PDF: {e}")
        return None

download_pdf(url, save_path)

Downloads a PDF file from a URL.

Source code in src/contextpack_md/api.py
def download_pdf(url: str, save_path: Path) -> bool:
    """
    Downloads a PDF file from a URL.
    """
    try:
        with httpx.Client(follow_redirects=True) as client:
            response = client.get(url)
            response.raise_for_status()
            with save_path.open("wb") as f:
                f.write(response.content)
            return True
    except Exception as e:
        print(f"Error downloading PDF: {e}")
        return False

get_url_context(url)

Fetches and scrapes a single URL to return clean Markdown.

Source code in src/contextpack_md/api.py
def get_url_context(url: str) -> Optional[str]:
    """
    Fetches and scrapes a single URL to return clean Markdown.
    """
    return fetch_and_scrape(url)

options: show_root_heading: true

contextpack_md.scraper

extract_content(html, url)

Extracts readable markdown from HTML.

Source code in src/contextpack_md/scraper.py
def extract_content(html: str, url: str) -> Optional[str]:
    """
    Extracts readable markdown from HTML.
    """
    return trafilatura.extract(
        html,
        url=url,
        output_format="markdown",
        include_links=True,
        include_images=False,
    )

fetch_and_scrape(url, timeout=10)

Downloads and extracts content from a URL.

Source code in src/contextpack_md/scraper.py
def fetch_and_scrape(url: str, timeout: int = 10) -> Optional[str]:
    """
    Downloads and extracts content from a URL.
    """
    downloaded = trafilatura.fetch_url(url)
    if not downloaded:
        return None
    return extract_content(downloaded, url)

options: show_root_heading: true