[1346]python ijson用法詳情 - python,json,JSON,數組,Html,CSS,前端開發 XC_周小董博客

文章目錄

安裝
基本概念
基礎用法

1. 基本解析
2. 解析文件中的特定路徑

高級用法

1. 使用事件解析
2. 處理大型 JSON 數組
3. 處理嵌套結構
4. 錯誤處理和性能優化

實際應用場景

1. 處理 API 響應流
2. 日誌文件分析

我來詳細介紹一下 Python ijson 庫的用法，這是一個用於流式解析大型 JSON 文件的工具。

安裝

pip install ijson

基本概念

ijson 的主要優勢在於它不需要將整個 JSON 文件加載到內存中，而是逐個讀取和解析 JSON 元素。

基礎用法

1. 基本解析

import ijson

# 示例 JSON 數據
json_data = '''
{
    "users": [
        {"id": 1, "name": "Alice", "age": 25},
        {"id": 2, "name": "Bob", "age": 30},
        {"id": 3, "name": "Charlie", "age": 35}
    ],
    "total": 3
}
'''

# 方法1: 從字符串解析
items = ijson.items(json_data, 'users.item')
for user in items:
    print(f"ID: {user['id']}, Name: {user['name']}, Age: {user['age']}")

# 方法2: 從文件解析
with open('data.json', 'w') as f:
    f.write(json_data)

with open('data.json', 'r') as f:
    users = ijson.items(f, 'users.item')
    for user in users:
        print(f"User: {user}")

2. 解析文件中的特定路徑

import ijson

# 假設有 large_data.json 文件
json_structure = '''
{
    "company": "Tech Corp",
    "departments": [
        {
            "name": "Engineering",
            "employees": [
                {"id": 101, "name": "John", "salary": 50000},
                {"id": 102, "name": "Jane", "salary": 60000}
            ]
        },
        {
            "name": "Marketing", 
            "employees": [
                {"id": 201, "name": "Mike", "salary": 45000},
                {"id": 202, "name": "Sarah", "salary": 55000}
            ]
        }
    ]
}
'''

with open('large_data.json', 'w') as f:
    f.write(json_structure)

# 解析特定路徑
with open('large_data.json', 'r') as f:
    # 獲取公司名稱
    company = ijson.items(f, 'company')
    print(f"Company: {next(company)}")

with open('large_data.json', 'r') as f:
    # 獲取所有部門名稱
    dept_names = ijson.items(f, 'departments.item.name')
    print("Departments:")
    for name in dept_names:
        print(f" - {name}")

with open('large_data.json', 'r') as f:
    # 獲取所有員工
    employees = ijson.items(f, 'departments.item.employees.item')
    print("\nAll Employees:")
    for emp in employees:
        print(f" - {emp['name']} (ID: {emp['id']})")

高級用法

1. 使用事件解析

import ijson

json_data = '''
{
    "users": [
        {"id": 1, "name": "Alice", "hobbies": ["reading", "swimming"]},
        {"id": 2, "name": "Bob", "hobbies": ["gaming", "coding"]}
    ]
}
'''

with open('events_data.json', 'w') as f:
    f.write(json_data)

print("解析事件流:")
with open('events_data.json', 'r') as f:
    parser = ijson.parse(f)
    
    current_path = []
    for prefix, event, value in parser:
        print(f"路徑: {prefix:20} 事件: {event:10} 值: {value}")
        
        # 可以根據事件類型進行特定處理
        if event == 'start_array':
            print(f"開始數組: {prefix}")
        elif event == 'end_array':
            print(f"結束數組: {prefix}")
        elif event == 'start_map':
            print(f"開始對象: {prefix}")
        elif event == 'end_map':
            print(f"結束對象: {prefix}")

2. 處理大型 JSON 數組

import ijson
import json

# 創建大型測試數據
large_data = {"products": []}
for i in range(1000):
    large_data["products"].append({
        "id": i + 1,
        "name": f"Product {i + 1}",
        "price": i * 10 + 5.99,
        "category": f"Category {i % 5}",
        "in_stock": i % 3 == 0
    })

with open('large_products.json', 'w') as f:
    json.dump(large_data, f)

# 使用 ijson 流式處理
def process_expensive_products(price_threshold=500.0):
    count = 0
    with open('large_products.json', 'r') as f:
        products = ijson.items(f, 'products.item')
        
        for product in products:
            if product['price'] > price_threshold:
                count += 1
                print(f"高價商品: {product['name']} - ${product['price']:.2f}")
    
    return count

expensive_count = process_expensive_products(500.0)
print(f"\n高價商品總數: {expensive_count}")

# 按類別統計
def count_by_category():
    category_count = {}
    with open('large_products.json', 'r') as f:
        products = ijson.items(f, 'products.item')
        
        for product in products:
            category = product['category']
            category_count[category] = category_count.get(category, 0) + 1
    
    return category_count

category_stats = count_by_category()
print("\n按類別統計:")
for category, count in category_stats.items():
    print(f"{category}: {count} 個商品")

3. 處理嵌套結構

import ijson

complex_data = '''
{
    "school": {
        "name": "Central High",
        "classes": [
            {
                "grade": "10A",
                "students": [
                    {"name": "Alice", "scores": {"math": 95, "english": 88}},
                    {"name": "Bob", "scores": {"math": 87, "english": 92}}
                ]
            },
            {
                "grade": "10B", 
                "students": [
                    {"name": "Charlie", "scores": {"math": 78, "english": 85}},
                    {"name": "Diana", "scores": {"math": 92, "english": 90}}
                ]
            }
        ]
    }
}
'''

with open('school_data.json', 'w') as f:
    f.write(complex_data)

# 提取學生數學成績
print("學生數學成績:")
with open('school_data.json', 'r') as f:
    students = ijson.items(f, 'school.classes.item.students.item')
    for student in students:
        print(f"{student['name']}: 數學 {student['scores']['math']}分")

# 提取特定路徑的分數
def get_english_scores():
    scores = []
    with open('school_data.json', 'r') as f:
        # 使用更精確的路徑
        parser = ijson.parse(f)
        current_student = None
        
        for prefix, event, value in parser:
            if prefix.endswith('name') and event == 'string':
                current_student = value
            elif prefix.endswith('scores.english') and event == 'number':
                scores.append((current_student, value))
    
    return scores

english_scores = get_english_scores()
print("\n英語成績:")
for student, score in english_scores:
    print(f"{student}: {score}分")

4. 錯誤處理和性能優化

import ijson
import json
import time

def safe_json_parsing(file_path):
    """安全的 JSON 解析函數"""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            # 使用 ijson 的基本驗證
            try:
                # 嘗試解析第一個元素來驗證文件格式
                parser = ijson.parse(f)
                first_event = next(parser, None)
                
                if first_event is None:
                    print("文件為空")
                    return
                
                # 重置文件指針
                f.seek(0)
                
                # 實際解析數據
                data = ijson.items(f, 'item')
                count = 0
                for item in data:
                    count += 1
                    # 處理每個項目
                    if count % 1000 == 0:
                        print(f"已處理 {count} 個項目...")
                
                print(f"總共處理了 {count} 個項目")
                
            except ijson.JSONError as e:
                print(f"JSON 解析錯誤: {e}")
            except Exception as e:
                print(f"其他錯誤: {e}")
                
    except FileNotFoundError:
        print(f"文件不存在: {file_path}")
    except UnicodeDecodeError:
        print("文件編碼錯誤")

# 性能對比測試
def performance_comparison():
    """對比 ijson 和標準 json 庫的性能"""
    
    # 創建測試數據
    test_data = [{"id": i, "data": "x" * 100} for i in range(10000)]
    
    with open('test_large.json', 'w') as f:
        json.dump(test_data, f)
    
    # 測試標準 json 庫
    start_time = time.time()
    with open('test_large.json', 'r') as f:
        data = json.load(f)
        count = len(data)
    std_time = time.time() - start_time
    
    # 測試 ijson
    start_time = time.time()
    with open('test_large.json', 'r') as f:
        items = ijson.items(f, 'item')
        count = 0
        for item in items:
            count += 1
    ijson_time = time.time() - start_time
    
    print(f"標準 json 庫: {std_time:.4f} 秒")
    print(f"ijson 流式解析: {ijson_time:.4f} 秒")
    print(f"處理項目數: {count}")

# 運行示例
if __name__ == "__main__":
    # 創建測試文件
    sample_data = [
        {"id": 1, "name": "Item 1", "value": 100},
        {"id": 2, "name": "Item 2", "value": 200},
        {"id": 3, "name": "name": "Item 3", "value": 300}
    ]
    
    with open('sample.json', 'w') as f:
        json.dump(sample_data, f)
    
    # 測試安全解析
    safe_json_parsing('sample.json')
    
    # 性能對比
    performance_comparison()

實際應用場景

1. 處理 API 響應流

import ijson
import requests

def stream_large_api_response():
    """處理大型 API 響應"""
    url = "https://api.example.com/large-data"
    
    response = requests.get(url, stream=True)
    response.raise_for_status()
    
    # 使用 ijson 流式解析響應內容
    items = ijson.items(response.raw, 'items.item')
    
    for item in items:
        # 處理每個項目，避免內存溢出
        process_item(item)

def process_item(item):
    """處理單個數據項"""
    print(f"處理: {item.get('id', 'N/A')} - {item.get('name', 'Unnamed')}")

2. 日誌文件分析

import ijson

def analyze_json_logs(log_file_path):
    """分析 JSON 格式的日誌文件"""
    error_count = 0
    warning_count = 0
    
    with open(log_file_path, 'r') as f:
        # 假設每行是一個獨立的 JSON 對象
        for line_num, line in enumerate(f, 1):
            line = line.strip()
            if not line:
                continue
                
            try:
                # 解析每行的 JSON
                log_entry = ijson.items(line, 'item')
                entry = next(log_entry, None)
                
                if entry:
                    level = entry.get('level', '').lower()
                    if level == 'error':
                        error_count += 1
                        print(f"錯誤在第 {line_num} 行: {entry.get('message')}")
                    elif level == 'warning':
                        warning_count += 1
    
            except ijson.JSONError:
                print(f"第 {line_num} 行 JSON 格式錯誤")
    
    print(f"\n統計結果:")
    print(f"錯誤數: {error_count}")
    print(f"警告數: {warning_count}")

# 使用示例
analyze_json_logs('app_logs.json')

ijson 特別適合處理以下幾種情況：

非常大的 JSON 文件（GB 級別）
網絡流式 JSON 數據
只需要提取部分數據的場景
內存受限的環境

記住在處理完成後及時關閉文件，並在生產環境中添加適當的錯誤處理。

XC_周小董博客

XC_周小董博客

博客 / 詳情

[1346]python ijson用法

文章目錄

安裝

基本概念

基礎用法

1. 基本解析

2. 解析文件中的特定路徑

高級用法

1. 使用事件解析

2. 處理大型 JSON 數組

3. 處理嵌套結構

4. 錯誤處理和性能優化

實際應用場景

1. 處理 API 響應流

2. 日誌文件分析

發佈評論

Product

Company

Support

Company

博客 / 詳情

[1346]python ijson用法

文章目錄

安裝

基本概念

基礎用法

1. 基本解析

2. 解析文件中的特定路徑

高級用法

1. 使用事件解析

2. 處理大型 JSON 數組

3. 處理嵌套結構

4. 錯誤處理和性能優化

實際應用場景

1. 處理 API 響應流

2. 日誌文件分析

發佈 評論

發佈評論