import sys, os
import gc
import copy
import numpy as np
import pandas as pd
import random
import time
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
# copy_excel_format
from copy_excel_format import *
gc.collect()
20
np.random.seed(57)
random.seed(57)
input_path = '../input/'
output_path = '../output/'
interm_path = '../intermediate/'
input_template_excel_path = input_path + 'input_template_excel_sample.xlsx'
input_header_csv_path = input_path + 'input_header_df_sample.csv'
header_df = pd.read_csv(input_header_csv_path)
header_df.head()
No. | date | col1 | col2 | col3 | col4 | col5 | col6 | |
---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | name: <name> | NaN | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | No. | date | col1 | col2 | col3_4_5 | NaN | NaN | col6 |
def get_sample_df(n_rows=10, header_df=header_df):
"""
sampleデータを作成する関数.
Args:
n_rows: int, optional(default=10)
データ部分のDataFrameの行数.
header_df: pandas.DataFrame
ヘッダー部分のDataFrame
"""
col1_samples = ['hoge', 'fuga', 'poyo', 'gray', None]
sample_df = pd.DataFrame()
sample_df['No.'] = range(n_rows)
sample_df['No.'] = sample_df['No.'] + 1
sample_df['date'] = [datetime(2020, 11, 20) + relativedelta(days=jj*7) for jj in range(n_rows)]
sample_df['col1'] = random.choices(col1_samples, k=n_rows)
sample_df['col2'] = np.random.randint(0, 10, size=n_rows)
sample_df['col3'] = np.random.randint(100, 200, size=n_rows)
sample_df['col4'] = random.choices(col1_samples, k=n_rows)
sample_df['col5'] = np.random.randint(0, 10, size=n_rows)
sample_df['col6'] = np.random.randint(100, 200, size=n_rows)
# headerをつける
tmp_name = random.choice(['hoge', 'fuga', 'poyo'])
tmp_header_df = header_df.copy()
tmp_header_df = tmp_header_df.replace('name: <name>', 'name: ' + tmp_name)
sample_df = tmp_header_df.append(sample_df)
return sample_df
sample_df = get_sample_df()
sample_df.head(3)
No. | date | col1 | col2 | col3 | col4 | col5 | col6 | |
---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | name: poyo | NaN | NaN | NaN | NaN | NaN | NaN |
2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
sample_df.tail()
No. | date | col1 | col2 | col3 | col4 | col5 | col6 | |
---|---|---|---|---|---|---|---|---|
5 | 6 | 2020-12-25 00:00:00 | fuga | 8 | 158 | gray | 2 | 133 |
6 | 7 | 2021-01-01 00:00:00 | None | 8 | 101 | gray | 8 | 199 |
7 | 8 | 2021-01-08 00:00:00 | gray | 1 | 198 | gray | 5 | 137 |
8 | 9 | 2021-01-15 00:00:00 | None | 7 | 148 | poyo | 2 | 130 |
9 | 10 | 2021-01-22 00:00:00 | hoge | 9 | 101 | None | 0 | 123 |
# DataFrameの数. シート数も同じ数.
n_df = 10
# CopyExcelInfoHolderオブジェクトインスタンスのリスト
ceih_list = []
# ceih_listというCopyExcelInfoHolderオブジェクトインスタンスのリストを作成しておく
for ii in range(n_df):
tmp_sheet_name = 'sheet' + str(ii+1).zfill(3)
tmp_df = get_sample_df(
n_rows = np.random.randint(10, 28)
)
ceih = CopyExcelInfoHolder(
template_excel_path = input_template_excel_path,
template_sheet_name = 'blank_template',
output_sheet_name = tmp_sheet_name,
df = tmp_df
)
ceih_list += [ceih]
del ceih
gc.collect()
# ceih_listというCopyExcelInfoHolderオブジェクトインスタンスのリストを作成しておく
ceih_list[:3]
[<copy_excel_format.excel_module.CopyExcelInfoHolder at 0x1e42f9d5fa0>, <copy_excel_format.excel_module.CopyExcelInfoHolder at 0x1e42ffb9880>, <copy_excel_format.excel_module.CopyExcelInfoHolder at 0x1e42ffb98b0>]
# ceih_listの中身を表示
[print('template_excel_path:{}'.format(ii.template_excel_path)) for ii in ceih_list[:3]]
template_excel_path:../input/input_template_excel_sample.xlsx template_excel_path:../input/input_template_excel_sample.xlsx template_excel_path:../input/input_template_excel_sample.xlsx
[None, None, None]
# ceih_listの中身を表示
[print('template_sheet_name:{}'.format(ii.template_sheet_name)) for ii in ceih_list[:3]]
template_sheet_name:blank_template template_sheet_name:blank_template template_sheet_name:blank_template
[None, None, None]
# ceih_listの中身を表示
[print('output_sheet_name:{}'.format(ii.output_sheet_name)) for ii in ceih_list[:3]]
output_sheet_name:sheet001 output_sheet_name:sheet002 output_sheet_name:sheet003
[None, None, None]
# ceih_listの中身を表示
[print('*' * 80 + '\ndf.head(3):{}'.format(ii.df.head(3)) + '\n' + '*' * 80 + '\n\n') for ii in ceih_list[:3]]
******************************************************************************** df.head(3): No. date col1 col2 col3 col4 col5 col6 0 NaN NaN NaN NaN NaN NaN NaN NaN 1 NaN name: poyo NaN NaN NaN NaN NaN NaN 2 NaN NaN NaN NaN NaN NaN NaN NaN ******************************************************************************** ******************************************************************************** df.head(3): No. date col1 col2 col3 col4 col5 col6 0 NaN NaN NaN NaN NaN NaN NaN NaN 1 NaN name: fuga NaN NaN NaN NaN NaN NaN 2 NaN NaN NaN NaN NaN NaN NaN NaN ******************************************************************************** ******************************************************************************** df.head(3): No. date col1 col2 col3 col4 col5 col6 0 NaN NaN NaN NaN NaN NaN NaN NaN 1 NaN name: poyo NaN NaN NaN NaN NaN NaN 2 NaN NaN NaN NaN NaN NaN NaN NaN ********************************************************************************
[None, None, None]
# ceih_listの中身を表示
[print('*' * 80 + '\ndf.tail(3):{}'.format(ii.df.tail(3)) + '\n' + '*' * 80 + '\n\n') for ii in ceih_list[:3]]
******************************************************************************** df.tail(3): No. date col1 col2 col3 col4 col5 col6 12 13 2021-02-12 00:00:00 None 6 147 fuga 6 157 13 14 2021-02-19 00:00:00 fuga 6 126 fuga 2 155 14 15 2021-02-26 00:00:00 gray 5 109 gray 9 143 ******************************************************************************** ******************************************************************************** df.tail(3): No. date col1 col2 col3 col4 col5 col6 18 19 2021-03-26 00:00:00 None 4 115 hoge 6 192 19 20 2021-04-02 00:00:00 hoge 0 134 None 2 134 20 21 2021-04-09 00:00:00 poyo 1 127 gray 0 194 ******************************************************************************** ******************************************************************************** df.tail(3): No. date col1 col2 col3 col4 col5 col6 11 12 2021-02-05 00:00:00 gray 8 187 hoge 0 190 12 13 2021-02-12 00:00:00 fuga 0 100 gray 8 161 13 14 2021-02-19 00:00:00 fuga 0 159 None 1 198 ********************************************************************************
[None, None, None]
print(len(ceih_list))
10
output_excel_path = output_path + 'output_excel_sample.xlsx'
output_excel_path
'../output/output_excel_sample.xlsx'
start = time.time()
# copy_excel_format関数を実行
copy_excel_format(
ceih_list = ceih_list,
output_excel_path = output_excel_path,
cef_manual_set_rows = None,
cef_force_dimension_copy = False,
cef_debug_mode = True,
write_index = False,
write_header = False,
copy_values = False
)
******************************************************************************** sheet name: sheet001 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.4 s to copy format end. elapsed time: 7.4 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet001 ******************************************************************************** ******************************************************************************** sheet name: sheet002 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.7 s to copy format end. elapsed time: 7.8 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet002 ******************************************************************************** ******************************************************************************** sheet name: sheet003 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.6 s to copy format end. elapsed time: 7.6 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet003 ******************************************************************************** ******************************************************************************** sheet name: sheet004 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.6 s to copy format end. elapsed time: 7.6 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet004 ******************************************************************************** ******************************************************************************** sheet name: sheet005 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.8 s to copy format end. elapsed time: 7.8 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet005 ******************************************************************************** ******************************************************************************** sheet name: sheet006 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.9 s to copy format end. elapsed time: 7.9 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet006 ******************************************************************************** ******************************************************************************** sheet name: sheet007 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.6 s to copy format end. elapsed time: 7.6 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet007 ******************************************************************************** ******************************************************************************** sheet name: sheet008 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.4 s to copy format end. elapsed time: 7.4 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet008 ******************************************************************************** ******************************************************************************** sheet name: sheet009 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.7 s to copy format end. elapsed time: 7.8 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet009 ******************************************************************************** ******************************************************************************** sheet name: sheet010 to write df to sheet end. elapsed time: 0.0 s to copy cell format end. elapsed time: 7.6 s to copy format end. elapsed time: 7.6 s ******************************************************************************** ******************************************************************************** template_excel_path:../input/input_template_excel_sample.xlsx template_sheet_name:blank_template output_sheet_name:sheet010 ********************************************************************************
get_elapsed_time(start)
elapsed time: 77.1 s
77.09773564338684
print('cpu_count:{}'.format(str(os.cpu_count())))
cpu_count:8
output_excel_path = output_path + 'output_excel_sample_parallel001.xlsx'
output_excel_path
'../output/output_excel_sample_parallel001.xlsx'
tmp_output_excel_dir_path = interm_path + 'tmp_output_excel/'
tmp_output_excel_dir_path
'../intermediate/tmp_output_excel/'
start = time.time()
# copy_excel_format関数の並列版を実行
copy_excel_format_parallel(
ceih_list = ceih_list,
output_excel_path = output_excel_path,
tmp_output_excel_dir_path = tmp_output_excel_dir_path,
parallel_method = 'multiprocess',
n_jobs = None,
copy_sheet_method = 'xlwings',
sorted_sheet_names_list = None,
del_tmp_dir = True,
n_seconds_to_sleep = 1,
cef_manual_set_rows = None,
cef_force_dimension_copy = False,
cef_debug_mode = True,
write_index = False,
write_header = False,
copy_values = False
)
get_elapsed_time(start)
elapsed time: 89.0 s
89.00972938537598
output_excel_path = output_path + 'output_excel_sample_parallel002.xlsx'
output_excel_path
'../output/output_excel_sample_parallel002.xlsx'
tmp_output_excel_dir_path = interm_path + 'tmp_output_excel/'
tmp_output_excel_dir_path
'../intermediate/tmp_output_excel/'
start = time.time()
# 並列処理を行い, 一時的な書式設定済みのexcelファイルを出力する.
output_temporary_excel_parallel(
ceih_list = ceih_list,
tmp_output_excel_dir_path = tmp_output_excel_dir_path,
parallel_method = 'multiprocess',
n_jobs = None,
cef_manual_set_rows = None,
cef_force_dimension_copy = False,
cef_debug_mode = True,
write_index = False,
write_header = False,
copy_values = False
)
# 一時的に出力した複数のexcelファイルをまとめて複数シートを持つ1つのexcelファイルとする.
copy_excel_format_from_temporary_files(
ceih_list = ceih_list,
output_excel_path = output_excel_path,
tmp_output_excel_dir_path = tmp_output_excel_dir_path,
copy_sheet_method = 'xlwings',
sorted_sheet_names_list = None,
del_tmp_dir = True,
n_seconds_to_sleep = 1
)
get_elapsed_time(start)
elapsed time: 84.0 s
84.04134583473206