mottery/clean_dlt.py

38 lines
1.0 KiB
Python

import pandas as pd
# 读取原始 JSON
df = pd.read_json('dlt_all_data.json')
def parse_front_balls(s):
return [int(x) for x in str(s).split()[:5]]
def parse_back_balls(s):
return [int(x) for x in str(s).split()[:2]]
records = []
for _, row in df.iterrows():
try:
fronts = parse_front_balls(row['frontWinningNum'])
backs = parse_back_balls(row['backWinningNum'])
records.append({
'issue': str(row['issue']),
'open_time': str(row['openTime']),
'front_ball_1': fronts[0],
'front_ball_2': fronts[1],
'front_ball_3': fronts[2],
'front_ball_4': fronts[3],
'front_ball_5': fronts[4],
'back_ball_1': backs[0],
'back_ball_2': backs[1]
})
except Exception as e:
print(f"Error parsing row: {row['issue']}, error: {e}")
# 保存为新 JSON
pd.DataFrame(records).to_json('dlt_clean.json',
orient='records', force_ascii=False)
print('精简后的数据已保存为 dlt_clean.json')