source: rattail/rattail/excel.py @ ae3d120

Last change on this file since ae3d120 was ae3d120, checked in by Lance Edgar <lance@…>, 12 months ago

Misc. flexibility improvements for vendor catalog batch

  • ExcelReader? now should ignore "pseudo-empty" columns
  • add ProductsHandler.get_case_size() method
  • vendor catalog batch input need not contain case sizes (we copy them from product master if not)
  • vendor catalog batch input need not contain vendor item codes either
  • vendor catalog product lookup now tries global default logic first (then falls back to its own traditional logic)
  • generic catalog parser lets handlers do more of the work
  • Property mode set to 100644
File size: 11.6 KB
Line 
1# -*- coding: utf-8; -*-
2################################################################################
3#
4#  Rattail -- Retail Software Framework
5#  Copyright © 2010-2022 Lance Edgar
6#
7#  This file is part of Rattail.
8#
9#  Rattail is free software: you can redistribute it and/or modify it under the
10#  terms of the GNU General Public License as published by the Free Software
11#  Foundation, either version 3 of the License, or (at your option) any later
12#  version.
13#
14#  Rattail is distributed in the hope that it will be useful, but WITHOUT ANY
15#  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16#  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
17#  details.
18#
19#  You should have received a copy of the GNU General Public License along with
20#  Rattail.  If not, see <http://www.gnu.org/licenses/>.
21#
22################################################################################
23"""
24Excel utilities
25"""
26
27from __future__ import unicode_literals, absolute_import
28
29import datetime
30
31import six
32import xlrd
33from xlrd.xldate import xldate_as_tuple
34
35import openpyxl
36from openpyxl.styles import Font, PatternFill
37from openpyxl.cell.cell import get_column_letter
38
39from rattail.util import progress_loop
40
41
42class ExcelReaderXLS(object):
43    """
44    Basic class for reading Excel "legacy" (.xls) files.
45
46    Uses the ``xlrd`` package to read the files.
47    """
48
49    def __init__(self, path, sheet=0, sheet_name=None, header=0,
50                 first_data_row=None,
51                 datefmt='%Y-%m-%d', strip_fieldnames=True):
52        """
53        Constructor; opens an Excel file for reading.
54
55        :param header: Which row should be used as the header, i.e. to
56           determine field (column) names.  This is a zero-based index, so is 0
57           by default (i.e. the first row).
58
59        :param first_data_row: Which is the first row to contain data.  If not
60           specified, it will be assumed that data rows begin immediately after
61           the header row, as defined by :param:`header`.  This again is
62           zero-based, so if the very first row is the true header, but then
63           there is another "header" row also, you might specify a value of
64           ``2`` here, since the 3rd row is the first to contain data.
65        """
66        self.book = xlrd.open_workbook(path)
67        if sheet_name is not None:
68            self.sheet = self.book.sheet_by_name(sheet_name)
69        else:
70            self.sheet = self.book.sheet_by_index(sheet)
71        self.header = header
72        if first_data_row is not None:
73            self.first_data_row = first_data_row
74        else:
75            self.first_data_row = self.header + 1
76        self.fields = self.sheet.row_values(self.header)
77        if strip_fieldnames:
78            self.fields = [field.strip() for field in self.fields]
79        self.datefmt = datefmt
80
81    def sheet_by_name(self, name):
82        return self.book.sheet_by_name(name)
83
84    def read_rows(self, progress=None):
85        rows = []
86
87        def append(row, i):
88            values = self.sheet.row_values(row)
89            data = dict([(self.fields[j], value)
90                         for j, value in enumerate(values)])
91            rows.append(data)
92
93        progress_loop(append, range(self.first_data_row, self.sheet.nrows), progress,
94                      message="Reading data from Excel file")
95        return rows
96
97    def parse_date(self, value, fmt=None):
98        if isinstance(value, float):
99            args = xldate_as_tuple(value, self.book.datemode)
100            return datetime.datetime(*args).date()
101        if value:
102            return datetime.datetime.strptime(value, fmt or self.datefmt).date()
103
104# TODO: this should become a base class and/or wrapper of some sort. for now
105# the "default" reader assumes XLS since apparently that is all we have ever
106# supported until now...which was a surprise to me...
107ExcelReader = ExcelReaderXLS
108
109
110class ExcelReaderXLSX(object):
111    """
112    Basic class for reading Excel 2010 (.xslx) files.
113
114    Uses the `openpyxl`_ package to read the files.
115
116    .. _openpyxl: https://openpyxl.readthedocs.io/en/stable/
117
118    :param path: Path to the Excel data file.
119
120    :param header_row: 1-based row number which contains the header,
121       with field names.
122
123    :param strip_fieldnames: If true (the default), any whitespace
124       surrounding the field names will be stripped, i.e. after they
125       are read from the header row.  Pass ``False`` here to suppress
126       the behavior and leave whitespace intact.
127    """
128
129    def __init__(self, path, header_row=1, strip_fieldnames=True,
130                 **kwargs):
131        """
132        Constructor; opens an Excel file for reading.
133
134        :param path: Path to the Excel file.
135
136        :param header_row: Which row contains the column headers.  This is
137           1-based, so the 1 is the default (i.e. the first row).
138        """
139        # nb. after much use with no problems, eventually did come
140        # across a spreadsheet which contained formula instead of
141        # values for certain cells.  so now using ``data_only=True``
142        # to avoid the formula, hopefully nothing else breaks..
143        self.book = openpyxl.load_workbook(filename=path, data_only=True)
144        self.sheet = self.book.active
145
146        self.header_row = header_row
147
148        self.fields = None
149        # TODO: this seems like a hacky way to get the header fields?  we are
150        # "iterating" over the single header row, effectively...
151        for row in self.sheet.iter_rows(min_row=self.header_row,
152                                        max_row=self.header_row,
153                                        values_only=True):
154            assert self.fields is None
155            self.fields = [field for field in row
156                           if field is not None]
157
158        if strip_fieldnames:
159            self.fields = [field.strip() for field in self.fields]
160
161    def iter_rows(self):
162        return self.sheet.iter_rows(min_row=self.header_row + 1,
163                                    values_only=True)
164
165    def read_rows(self, progress=None):
166        fieldcount = len(self.fields)
167        rows = []
168
169        def append(row, i):
170            data = {}
171            for j, value in enumerate(row):
172                if j < fieldcount:
173                    data[self.fields[j]] = value
174            rows.append(data)
175
176        xlrows = list(self.iter_rows())
177        progress_loop(append, xlrows, progress,
178                      message="Reading data from Excel file")
179        return rows
180
181    # TODO: this is here just for method signature compatibility with
182    # the older xlrd-based reader above.  maybe should just deprecate
183    # and/or remove it though
184    def parse_date(self, value, fmt=None):
185        return value
186
187
188class ExcelWriter(object):
189    """
190    Base class for Excel writers.
191    """
192
193    def __init__(self, path, fields, sheet_title=None, number_formats={}):
194        """
195        Constructor; opens an Excel workbook for writing.
196        """
197        self.path = path
198        self.fields = fields
199        self.book = openpyxl.Workbook()
200        self.sheet = self.book.active
201        if sheet_title:
202            self.sheet.title = sheet_title
203        self.number_formats = number_formats
204
205    def create_sheet(self, title):
206        """
207        Create a new sheet in the workbook, and make it active.
208        """
209        self.sheet = self.book.create_sheet(title)
210        return self.sheet
211
212    def enable_grid_lines(self, sheet=None, enabled=True):
213        if not sheet:
214            sheet = self.sheet
215        sheet.sheet_view.showGridLines = enabled
216
217    def disable_grid_lines(self, sheet=None):
218        self.enable_grid_lines(sheet=sheet, enabled=False)
219
220    def write_header(self, labels=None):
221        font = Font(bold=True)
222        for i, field in enumerate(self.fields, 1):
223            value = field
224            if labels and field in labels:
225                value = labels[field]
226            cell = self.sheet.cell(row=1, column=i, value=value)
227            cell.font = font
228
229    def write_row(self, data, row=None):
230        """
231        Write (append) a single data row to the current sheet.
232
233        :param row: The 1-based row number to which data should be written.
234        """
235        if row is None:
236            raise NotImplementedError("should be able to detect 'next' row here?")
237
238        self.sheet.append(data)
239
240        # apply number formats
241        if self.number_formats:
242            for col, field in enumerate(self.fields, 1):
243                if field in self.number_formats:
244                    cell = self.sheet.cell(row=row, column=col)
245                    cell.number_format = self.number_formats[field]
246
247        # apply row highlighting
248        if row % 2 == 0:
249            fill_even = PatternFill(patternType='solid',
250                                    fgColor='d9d9d9',
251                                    bgColor='d9d9d9')
252            for col, field in enumerate(self.fields, 1):
253                cell = self.sheet.cell(row=row, column=col)
254                cell.fill = fill_even
255
256    def write_rows(self, rows, progress=None):
257        """
258        Write (append) a sequence of data rows to the current sheet.
259        """
260        def write(data, i):
261            # must add 1 to account for header
262            self.write_row(data, row=i + 1)
263
264        progress_loop(write, rows, progress,
265                      message="Generating cells for spreadsheet")
266
267    def auto_freeze(self, row=2, column=1):
268        """
269        Freeze sheet per "the usual"
270        """
271        self.sheet.freeze_panes = self.sheet.cell(row=row, column=column)
272
273    def auto_filter(self):
274        """
275        Add auto filters for all columns.
276        """
277        first = self.sheet.cell(row=1, column=1)
278        last = self.sheet.cell(row=self.sheet.max_row, column=self.sheet.max_column)
279        cellrange = '{}:{}'.format(first.coordinate, last.coordinate)
280        self.sheet.auto_filter.ref = cellrange
281
282    def auto_resize(self, progress=None):
283        """
284        (Try to) Auto-resize all data columns.
285        """
286        # note, some of the below uses efficiency tricks from these docs
287        # https://openpyxl.readthedocs.io/en/stable/tutorial.html#accessing-many-cells
288
289        # we must calculate desired column widths.  but for sake of progress,
290        # we'll iterate through rows instead of columns, to do that.  (this is
291        # just to give a higher total for the progress bar.)  so the first pass
292        # is really just to cache all existing string widths.
293        cached = []
294
295        def cache(row, i):
296            cached.append([len(six.text_type(value))
297                           for value in row])
298
299        progress_loop(cache, list(self.sheet.values), progress,
300                      message="Calculating all string widths")
301
302        # okay, now can determine ideal widths
303        column_widths = []
304
305        def calculate(col, i):
306            width = max([row_widths[col] for row_widths in cached])
307            column_widths.append(width or 5)
308
309        progress_loop(calculate, range(self.sheet.max_column), progress,
310                      message="Calculating desired column widths")
311
312        # resize columns
313        for i, width in enumerate(column_widths, 1):
314            self.sheet.column_dimensions[get_column_letter(i)].width = width + 3
315
316    def save(self, progress=None):
317        """
318        Save the Excel workbook to file.  If ``progress`` is provided, it will
319        be used in a hacky sort of way, i.e. from 0 to 1 only since we have no
320        way of knowing true progress for the save operation.  (But it can still
321        be nice to let user know this is the step we're on at least.)
322        """
323        def save(x, i):
324            self.book.save(self.path)
325
326        progress_loop(save, range(1), progress,
327                      message="Saving workbook to file")
Note: See TracBrowser for help on using the repository browser.