unstructured@0.5.9

A library that prepares raw documents for downstream ML tasks.

2023-04-03 Python <3.14,>=3.11 sdist
安装 (0.5.9)
pip install unstructured==0.5.9
poetry add unstructured==0.5.9
pipenv install unstructured==0.5.9
conda install unstructured=0.5.9
依赖 (115)
依赖
beautifulsoup4<5.0.0,>=4.14.3
charset-normalizer<4.0.0,>=3.4.4
emoji<3.0.0,>=2.15.0
filelock<4.0.0,>=3.12.0
filetype<2.0.0,>=1.2.0
html5lib<2.0.0,>=1.1
installer<1.0.0,>=0.7.0
langdetect<2.0.0,>=1.0.9
lxml<7.0.0,>=5.0.0
numba<1.0.0,>=0.60.0
numpy<3.0.0,>=1.26.0
psutil<8.0.0,>=7.2.2
python-iso639<2027.0.0,>=2026.1.31
python-magic<1.0.0,>=0.4.27
python-oxmsg<1.0.0,>=0.0.2
rapidfuzz<4.0.0,>=3.14.3
regex<2027.0.0,>=2024.0.0
requests<3.0.0,>=2.32.5
spacy<4.0.0,>=3.7.0
tqdm<5.0.0,>=4.67.3
typing-extensions<5.0.0,>=4.15.0
unstructured-client<1.0.0,>=0.25.9
wrapt<3.0.0,>=2.1.1
google-cloud-vision<4.0.0,>=3.12.1; extra == "all-docs"
markdown<4.0.0,>=3.10.1; extra == "all-docs"
msoffcrypto-tool<7.0.0,>=6.0.0; extra == "all-docs"
networkx<4.0.0,>=3.2.0; extra == "all-docs"
openai-whisper<20270000,>=20231117; extra == "all-docs"
openpyxl<4.0.0,>=3.1.5; extra == "all-docs"
pandas<3.0.0,>=2.0.0; extra == "all-docs"
pdf2image<2.0.0,>=1.17.0; extra == "all-docs"
pdfminer-six<20270000,>=20251230; extra == "all-docs"
pi-heif<2.0.0,>=1.2.0; extra == "all-docs"
pikepdf<11.0.0,>=10.3.0; extra == "all-docs"
pypandoc-binary<2.0.0,>=1.16.2; platform_system != "Windows" and extra == "all-docs"
pypandoc-binary<2.0.0,>=1.16.2; (platform_system == "Windows" and python_version < "3.13") and extra == "all-docs"
pypdf<7.0.0,>=6.6.2; extra == "all-docs"
python-docx<2.0.0,>=1.2.0; extra == "all-docs"
python-pptx<2.0.0,>=1.0.2; extra == "all-docs"
unstructured-inference<2.0.0,>=1.6.12; platform_system != "Windows" and extra == "all-docs"
unstructured-inference<2.0.0,>=1.6.12; (platform_system == "Windows" and python_version >= "3.12" and python_version < "3.13") and extra == "all-docs"
unstructured-pytesseract<1.0.0,>=0.3.15; extra == "all-docs"
xlrd<3.0.0,>=2.0.1; extra == "all-docs"
openai-whisper<20270000,>=20231117; extra == "audio"
tiktoken<1.0.0,>=0.12.0; extra == "chunking-tokens"
pandas<3.0.0,>=2.0.0; extra == "csv"
python-docx<2.0.0,>=1.2.0; extra == "doc"
python-docx<2.0.0,>=1.2.0; extra == "docx"
pypandoc-binary<2.0.0,>=1.16.2; platform_system != "Windows" and extra == "epub"
pypandoc-binary<2.0.0,>=1.16.2; (platform_system == "Windows" and python_version < "3.13") and extra == "epub"
sentencepiece<1.0.0,>=0.2.0; extra == "huggingface"
torch<3.0.0,>=2.10.0; platform_system != "Windows" and extra == "huggingface"
torch<3.0.0,>=2.10.0; (platform_system == "Windows" and python_version < "3.13") and extra == "huggingface"
transformers<6.0.0,>=5.2.0; extra == "huggingface"
google-cloud-vision<4.0.0,>=3.12.1; extra == "image"
pdf2image<2.0.0,>=1.17.0; extra == "image"
pdfminer-six<20270000,>=20251230; extra == "image"
pi-heif<2.0.0,>=1.2.0; extra == "image"
pikepdf<11.0.0,>=10.3.0; extra == "image"
pypdf<7.0.0,>=6.6.2; extra == "image"
unstructured-inference<2.0.0,>=1.6.12; platform_system != "Windows" and extra == "image"
unstructured-inference<2.0.0,>=1.6.12; (platform_system == "Windows" and python_version >= "3.12" and python_version < "3.13") and extra == "image"
unstructured-pytesseract<1.0.0,>=0.3.15; extra == "image"
unstructured-ingest[airtable,astradb,azure,azure-ai-search,bedrock,biomed,box,chroma,confluence,couchbase,databricks-volumes,delta-table,discord,dropbox,elasticsearch,gcs,github,gitlab,google-drive,hubspot,huggingface,jira,kafka,kdbai,milvus,mongodb,notion,octoai,onedrive,openai,opensearch,outlook,pinecone,postgres,qdrant,reddit,remote,s3,salesforce,sftp,sharepoint,singlestore,slack,vectara,vertexai,voyageai,weaviate,wikipedia]<2.0.0,>=1.4.0; platform_system != "Windows" and extra == "ingest"
unstructured-ingest[airtable,astradb,azure,azure-ai-search,bedrock,biomed,box,chroma,confluence,couchbase,databricks-volumes,delta-table,discord,dropbox,elasticsearch,gcs,github,gitlab,google-drive,hubspot,huggingface,jira,kafka,kdbai,milvus,mongodb,notion,octoai,onedrive,openai,opensearch,outlook,pinecone,postgres,qdrant,reddit,remote,s3,salesforce,sftp,sharepoint,singlestore,slack,vectara,vertexai,voyageai,weaviate,wikipedia]<2.0.0,>=1.4.0; (platform_system == "Windows" and python_version < "3.13") and extra == "ingest"
google-cloud-vision<4.0.0,>=3.12.1; extra == "local-inference"
markdown<4.0.0,>=3.10.1; extra == "local-inference"
msoffcrypto-tool<7.0.0,>=6.0.0; extra == "local-inference"
networkx<4.0.0,>=3.2.0; extra == "local-inference"
openai-whisper<20270000,>=20231117; extra == "local-inference"
openpyxl<4.0.0,>=3.1.5; extra == "local-inference"
pandas<3.0.0,>=2.0.0; extra == "local-inference"
pdf2image<2.0.0,>=1.17.0; extra == "local-inference"
pdfminer-six<20270000,>=20251230; extra == "local-inference"
pi-heif<2.0.0,>=1.2.0; extra == "local-inference"
pikepdf<11.0.0,>=10.3.0; extra == "local-inference"
pypandoc-binary<2.0.0,>=1.16.2; platform_system != "Windows" and extra == "local-inference"
pypandoc-binary<2.0.0,>=1.16.2; (platform_system == "Windows" and python_version < "3.13") and extra == "local-inference"
pypdf<7.0.0,>=6.6.2; extra == "local-inference"
python-docx<2.0.0,>=1.2.0; extra == "local-inference"
python-pptx<2.0.0,>=1.0.2; extra == "local-inference"
unstructured-inference<2.0.0,>=1.6.12; platform_system != "Windows" and extra == "local-inference"
unstructured-inference<2.0.0,>=1.6.12; (platform_system == "Windows" and python_version >= "3.12" and python_version < "3.13") and extra == "local-inference"
unstructured-pytesseract<1.0.0,>=0.3.15; extra == "local-inference"
xlrd<3.0.0,>=2.0.1; extra == "local-inference"
markdown<4.0.0,>=3.10.1; extra == "md"
pypandoc-binary<2.0.0,>=1.16.2; platform_system != "Windows" and extra == "odt"
pypandoc-binary<2.0.0,>=1.16.2; (platform_system == "Windows" and python_version < "3.13") and extra == "odt"
python-docx<2.0.0,>=1.2.0; extra == "odt"
pypandoc-binary<2.0.0,>=1.16.2; platform_system != "Windows" and extra == "org"
pypandoc-binary<2.0.0,>=1.16.2; (platform_system == "Windows" and python_version < "3.13") and extra == "org"
paddlepaddle<4.0.0,>=3.3.0; (platform_machine != "aarch64" and platform_system != "Windows") and extra == "paddleocr"
paddlepaddle<4.0.0,>=3.3.0; (platform_system == "Windows" and python_version < "3.13") and extra == "paddleocr"
unstructured-paddleocr==2.10.0; extra == "paddleocr"
google-cloud-vision<4.0.0,>=3.12.1; extra == "pdf"
pdf2image<2.0.0,>=1.17.0; extra == "pdf"
pdfminer-six<20270000,>=20251230; extra == "pdf"
pi-heif<2.0.0,>=1.2.0; extra == "pdf"
pikepdf<11.0.0,>=10.3.0; extra == "pdf"
pypdf<7.0.0,>=6.6.2; extra == "pdf"
unstructured-inference<2.0.0,>=1.6.12; platform_system != "Windows" and extra == "pdf"
unstructured-inference<2.0.0,>=1.6.12; (platform_system == "Windows" and python_version >= "3.12" and python_version < "3.13") and extra == "pdf"
unstructured-pytesseract<1.0.0,>=0.3.15; extra == "pdf"
python-pptx<2.0.0,>=1.0.2; extra == "ppt"
python-pptx<2.0.0,>=1.0.2; extra == "pptx"
pypandoc-binary<2.0.0,>=1.16.2; platform_system != "Windows" and extra == "rst"
pypandoc-binary<2.0.0,>=1.16.2; (platform_system == "Windows" and python_version < "3.13") and extra == "rst"
pypandoc-binary<2.0.0,>=1.16.2; platform_system != "Windows" and extra == "rtf"
pypandoc-binary<2.0.0,>=1.16.2; (platform_system == "Windows" and python_version < "3.13") and extra == "rtf"
pandas<3.0.0,>=2.0.0; extra == "tsv"
msoffcrypto-tool<7.0.0,>=6.0.0; extra == "xlsx"
networkx<4.0.0,>=3.2.0; extra == "xlsx"
openpyxl<4.0.0,>=3.1.5; extra == "xlsx"
pandas<3.0.0,>=2.0.0; extra == "xlsx"
xlrd<3.0.0,>=2.0.1; extra == "xlsx"
下载文件
文件名类型Python 版本大小
unstructured-0.5.9.tar.gz sdist source 1298365 bytes
版本列表
0.23.1 2026-06-11
0.23.0 2026-06-10
0.22.32 2026-06-08
0.22.31 2026-05-23
0.22.30 2026-05-22
0.22.29 2026-05-18
0.22.28 2026-05-13
0.22.27 2026-05-05
0.22.26 2026-04-29
0.22.23 2026-04-24
0.22.22 2026-04-20
0.22.21 2026-04-14
0.22.20 2026-04-14
0.22.18 2026-04-08
0.22.16 2026-04-03
0.22.12 2026-04-02
0.22.10 2026-03-31
0.22.6 2026-03-26
0.21.5 2026-02-24
0.21.2 2026-02-23
0.21.1 2026-02-22
0.21.0 2026-02-22
0.20.8 2026-02-20
0.20.6 2026-02-19
0.20.2 2026-02-13
0.18.32 2026-02-10
0.18.31 2026-01-27
0.18.27 2026-01-09
0.18.26 2026-01-05
0.18.24 2025-12-30
0.18.21 2025-11-24
0.18.20 2025-11-15
0.18.18 2025-11-07
0.18.15 2025-09-17
0.18.14 2025-08-26
0.18.13 2025-08-14
0.18.11 2025-07-23
0.18.9 2025-07-16
0.18.7 2025-07-15
0.18.6 2025-07-15
0.18.5 2025-07-11
0.18.3 2025-07-05
0.18.2 2025-07-01
0.18.1 2025-06-24
0.17.2 2025-03-20
0.17.0 2025-03-12
0.16.25 2025-03-07
0.16.24 2025-03-07
0.16.23 2025-02-20
0.16.22 2025-02-20
0.16.21 2025-02-17
0.16.20 2025-02-06
0.16.19 2025-02-05
0.16.17 2025-01-29
0.16.16 2025-01-27
0.16.15 2025-01-23
0.16.14 2025-01-20
0.16.13 2025-01-13
0.16.12 2025-01-05
0.16.11 2024-12-10
0.16.10 2024-12-07
0.16.9 2024-12-02
0.16.8 2024-11-26
0.16.7 2024-11-26
0.16.6 2024-11-22
0.16.5 2024-11-07
0.16.4 2024-10-31
0.16.3 2024-10-25
0.16.2 2024-10-24
0.16.1 2024-10-23
0.16.0 2024-10-17
0.15.14 2024-10-10
0.15.13 2024-09-20
0.15.12 2024-09-13
0.15.10 2024-09-10
0.15.9 2024-08-30
0.15.8 2024-08-27
0.15.7 2024-08-20
0.15.6 2024-08-20
0.15.5 2024-08-16
0.15.3 2024-08-14
0.15.1 2024-08-05
0.15.0 2024-07-19
0.14.10 2024-07-09
0.14.9 2024-06-27
0.14.8 2024-06-24
0.14.7 2024-06-20
0.14.6 2024-06-14
0.14.5 2024-06-07
0.14.4 2024-06-03
0.14.3 2024-05-29
0.14.2 2024-05-22
0.14.2.dev1 2024-05-21
0.14.0 2024-05-17
0.13.7 2024-05-08
0.13.6 2024-04-30
0.13.5 2024-04-29
0.13.4 2024-04-26
0.13.3 2024-04-21
0.13.2 2024-04-05
0.13.1 2024-04-04
0.13.0 2024-04-01
0.12.6 2024-03-08
0.12.5 2024-02-28
0.12.4 2024-02-08
0.12.3 2024-01-29
0.12.2 2024-01-21
0.12.0 2024-01-10
0.11.8 2024-01-03
0.11.7 2024-01-03
0.11.6 2023-12-20
0.11.5 2023-12-17
0.11.4 2023-12-15
0.11.2 2023-11-30
0.11.1 2023-11-29
0.11.0 2023-11-20
0.10.30 2023-11-10
0.10.29 2023-11-07
0.10.28 2023-10-31
0.10.27 2023-10-26
0.10.26 2023-10-25
0.10.25 2023-10-21
0.10.24 2023-10-17
0.10.23 2023-10-16
0.10.22 2023-10-13
0.10.21 2023-10-11
0.10.20 2023-10-11
0.10.19 2023-10-05
0.10.19.dev18 2023-09-30
0.10.18 2023-09-29
0.10.16 2023-09-20
0.10.15 2023-09-16
0.10.14 2023-09-11
0.10.13 2023-09-11
0.10.12 2023-09-04
0.10.11 2023-09-01
0.10.10 2023-08-31
0.10.9 2023-08-30
0.10.8 2023-08-28
0.10.7 2023-08-27
0.10.6 2023-08-26
0.10.5 2023-08-22
0.10.4 2023-08-18
0.10.2 2023-08-17
0.10.1 2023-08-17
0.10.0 2023-08-16
0.9.3 2023-08-15
0.9.2 2023-08-11
0.9.1 2023-08-09
0.9.0 2023-08-01
0.8.8 2023-08-01
0.8.7 2023-07-28
0.8.6 2023-07-28
0.8.5 2023-07-27
0.8.4 2023-07-26
0.8.3 2023-07-26
0.8.1 2023-07-11
0.8.0 2023-07-07
0.7.12 2023-07-01
0.7.11 2023-06-30
0.7.10 2023-06-28
0.7.9 2023-06-26
0.7.8 2023-06-23
0.7.7 2023-06-20
0.7.6 2023-06-16
0.7.5 2023-06-14
0.7.4 2023-06-12
0.7.3 2023-06-09
0.7.2 2023-06-07
0.7.1 2023-06-01
0.7.0 2023-05-31
0.6.11 2023-05-30
0.6.10 2023-05-26
0.6.9 2023-05-24
0.6.8 2023-05-19
0.6.7 2023-05-19
0.6.6 2023-05-12
0.6.5 2023-05-10
0.6.4 2023-05-08
0.6.3 2023-05-04
0.6.2 2023-04-26
0.6.1 2023-04-21
0.6.0 2023-04-21
0.5.13 2023-04-19
0.5.12 2023-04-12
0.5.11 2023-04-05
0.5.10 2023-04-05
0.5.9 2023-04-03
0.5.8 2023-03-30
0.5.7 2023-03-25
0.5.6 2023-03-21
0.5.4 2023-03-14
0.5.3 2023-03-09
0.5.2 2023-03-02
0.5.1 2023-03-01
0.5.0 2023-02-28
0.4.16 2023-02-28
0.4.15 2023-02-23
0.4.14 2023-02-23
0.4.13 2023-02-23
0.4.12 2023-02-23
0.4.11 2023-02-17
0.4.10 2023-02-16
0.4.9 2023-02-15
0.4.8 2023-02-10
0.4.7 2023-02-10
0.4.6 2023-02-03
0.4.4 2023-01-25
0.4.3 2023-01-18
0.4.2 2023-01-17
0.4.1 2023-01-13
0.4.0 2023-01-11
0.3.5 2023-01-05
0.3.4 2022-12-21
0.3.3 2022-12-20
0.3.2 2022-12-15
0.3.1 2022-12-14
0.3.0 2022-12-14
0.2.6.dev1 2022-11-23
0.2.5 2022-11-11
0.2.4 2022-11-11
0.2.3 2022-11-10
0.2.2 2022-11-08
0.2.1 2022-10-21
0.2.0 2022-09-26
0.0.1.dev0 2022-09-06