mottery/clean_ssq.py

38 lines
1021 B
Python

import pandas as pd
# 读取原始 JSON
df = pd.read_json('ssq_all_data.json')
def parse_red_balls(s):
return [int(x) for x in str(s).split()[:6]]
def parse_blue_ball(s):
return int(str(s).split()[0])
records = []
for _, row in df.iterrows():
try:
reds = parse_red_balls(row['frontWinningNum'])
blue = parse_blue_ball(row['backWinningNum'])
records.append({
'issue': str(row['issue']),
'open_time': str(row['openTime']),
'red_ball_1': reds[0],
'red_ball_2': reds[1],
'red_ball_3': reds[2],
'red_ball_4': reds[3],
'red_ball_5': reds[4],
'red_ball_6': reds[5],
'blue_ball': blue
})
except Exception as e:
print(f"Error parsing row: {row['issue']}, error: {e}")
# 保存为新 JSON
pd.DataFrame(records).to_json('ssq_clean.json',
orient='records', force_ascii=False)
print('精简后的数据已保存为 ssq_clean.json')