ai

python-docx 是一个用于创建和更新 Microsoft Word (.docx) 文件的 Python 库。它允许开发者通过编程方式生成复杂的 Word 文档，而无需手动操作 Word 应用程序。

安装 #

pip install python-docx

基本用法 #

1. 创建新文档 #

from docx import Document

# 创建一个新的Document对象
doc = Document()

# 添加标题
doc.add_heading('文档标题', level=0)

# 添加段落
doc.add_paragraph('这是一个普通段落。')

# 添加带格式的段落
doc.add_paragraph('这是一个加粗的段落。', style='Heading 2')

# 保存文档
doc.save('demo.docx')

2. 读取现有文档 #

from docx import Document

# 打开现有文档
doc = Document('existing_document.docx')

# 遍历所有段落
for para in doc.paragraphs:
    print(para.text)

# 遍历所有表格
for table in doc.tables:
    for row in table.rows:
        for cell in row.cells:
            print(cell.text)

核心功能 #

1. 文本处理 #

# 添加不同样式的文本
paragraph = doc.add_paragraph()
paragraph.add_run('普通文本 ')
paragraph.add_run('加粗文本').bold = True
paragraph.add_run(' 斜体文本').italic = True

# 设置字体
from docx.shared import Pt
from docx.shared import RGBColor

run = paragraph.add_run('自定义字体')
run.font.size = Pt(14)
run.font.color.rgb = RGBColor(0x42, 0x24, 0xE9)
run.font.name = 'Arial'

2. 表格操作 #

# 添加表格
table = doc.add_table(rows=3, cols=3)

# 填充表格数据
for i in range(3):
    for j in range(3):
        table.cell(i, j).text = f'行{i+1}列{j+1}'

# 设置表格样式
table.style = 'Light Shading Accent 1'

# 添加行
row = table.add_row()
row.cells[0].text = "新行"

3. 图片插入 #

from docx.shared import Inches

doc.add_picture('image.png', width=Inches(2.0))

4. 页眉页脚 #

section = doc.sections[0]
header = section.header
footer = section.footer

header_para = header.paragraphs[0]
header_para.text = "这是页眉"

footer_para = footer.paragraphs[0]
footer_para.text = "页码: "
footer_para.add_run().add_field('PAGE')

5. 样式管理 #

# 获取所有可用样式
styles = doc.styles
for style in styles:
    print(style.name)

# 创建自定义样式
from docx.enum.style import WD_STYLE_TYPE

my_style = styles.add_style('MyStyle', WD_STYLE_TYPE.PARAGRAPH)
my_style.font.size = Pt(12)
my_style.font.bold = True
my_style.paragraph_format.space_after = Pt(12)

# 使用自定义样式
doc.add_paragraph('使用自定义样式', style='MyStyle')

高级功能 #

1. 文档分节 #

# 添加分节符
doc.add_section()

# 获取所有节
for section in doc.sections:
    print(section.start_type)

    # 设置页面方向
    from docx.enum.section import WD_ORIENT
    section.orientation = WD_ORIENT.LANDSCAPE

    # 设置页边距
    from docx.shared import Inches
    section.left_margin = Inches(1.5)
    section.right_margin = Inches(1.5)

2. 列表和编号 #

# 添加项目符号列表
doc.add_paragraph('第一项', style='List Bullet')
doc.add_paragraph('第二项', style='List Bullet')

# 添加编号列表
doc.add_paragraph('第一步', style='List Number')
doc.add_paragraph('第二步', style='List Number')

3. 超链接 #

from docx.oxml.shared import qn

def add_hyperlink(paragraph, text, url):
    part = paragraph.part
    r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True)

    hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink')
    hyperlink.set(docx.oxml.shared.qn('r:id'), r_id)

    new_run = docx.oxml.shared.OxmlElement('w:r')
    rPr = docx.oxml.shared.OxmlElement('w:rPr')
    new_run.append(rPr)
    new_run.text = text
    hyperlink.append(new_run)

    paragraph._p.append(hyperlink)

    return hyperlink

paragraph = doc.add_paragraph()
add_hyperlink(paragraph, 'Google', 'http://google.com')

注意事项 #

python-docx 只能处理 .docx 格式，不能处理旧的 .doc 格式
复杂的 Word 功能（如宏、表单控件等）可能不受支持
文档修改后必须调用 save() 方法才能保存更改
对于大型文档处理，可能需要考虑内存使用情况

总结 #

python-docx 模块提供了强大的功能来创建和操作 Word 文档，适用于自动化报告生成、批量文档处理等场景。通过组合各种功能，可以创建出专业外观的文档，大大提高工作效率。

安装 #

基本用法 #

1. 创建新文档 #

2. 读取现有文档 #

核心功能 #

1. 文本处理 #

2. 表格操作 #

3. 图片插入 #

4. 页眉页脚 #

5. 样式管理 #

高级功能 #

1. 文档分节 #

2. 列表和编号 #

3. 超链接 #

注意事项 #

总结 #

访问验证