source: rattail/rattail/batch/handlers.py @ 396a48e

Last change on this file since 396a48e was 396a48e, checked in by Lance Edgar <ledgar@…>, 10 months ago

Add mark_complete() and mark_incomplete() methods for batch handler

  • Property mode set to 100644
File size: 22.7 KB
Line 
1# -*- coding: utf-8; -*-
2################################################################################
3#
4#  Rattail -- Retail Software Framework
5#  Copyright © 2010-2018 Lance Edgar
6#
7#  This file is part of Rattail.
8#
9#  Rattail is free software: you can redistribute it and/or modify it under the
10#  terms of the GNU General Public License as published by the Free Software
11#  Foundation, either version 3 of the License, or (at your option) any later
12#  version.
13#
14#  Rattail is distributed in the hope that it will be useful, but WITHOUT ANY
15#  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16#  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
17#  details.
18#
19#  You should have received a copy of the GNU General Public License along with
20#  Rattail.  If not, see <http://www.gnu.org/licenses/>.
21#
22################################################################################
23"""
24Data Batch Handlers
25"""
26
27from __future__ import unicode_literals, absolute_import
28
29import os
30import shutil
31import datetime
32import warnings
33
34from sqlalchemy import orm
35
36from rattail.core import Object
37from rattail.db.cache import cache_model
38from rattail.time import localtime, make_utc
39from rattail.util import progress_loop, load_object
40
41
42class BatchHandler(object):
43    """
44    Base class and partial default implementation for batch handlers.  It is
45    expected that all batch handlers will ultimately inherit from this base
46    class, therefore it defines the implementation "interface" loosely
47    speaking.  Custom batch handlers are welcome to supplement or override this
48    as needed, and in fact must do so for certain aspects.
49
50    .. attribute:: populate_batches
51
52       Simple flag to indicate whether any/all batches being handled, will
53       require initial population from a relevant data source.  Note that this
54       flag should be set to ``True`` if *any* batches may need population.
55       Whether or not a given batch actually needs to be populated, is
56       ultimately determined by the :meth:`should_populate()` method.
57
58    .. attribute:: populate_with_versioning
59
60       This flag indicates whether it's okay for data versioning to be enabled
61       during initial batch population.
62
63       If set to ``True`` (the default), then versioning is allowed and
64       therefore the caller need take no special precautions when populating
65       the batch.
66
67       If set to ``False`` then versioning is *not* allowed; if versioning is
68       not enabled for the current process, the caller may populate the batch
69       with no special precautions.  However if versioning *is* enabled, the
70       caller must launch a separate process with versioning disabled, in order
71       to populate the batch.
72
73    .. attribute:: refresh_with_versioning
74
75       This flag indicates whether it's okay for data versioning to be enabled
76       during batch refresh.
77
78       If set to ``True`` (the default), then versioning is allowed and
79       therefore the caller need take no special precautions when populating
80       the batch.
81
82       If set to ``False`` then versioning is *not* allowed; if versioning is
83       not enabled for the current process, the caller may populate the batch
84       with no special precautions.  However if versioning *is* enabled, the
85       caller must launch a separate process with versioning disabled, in order
86       to refresh the batch.
87
88    .. attribute:: execute_with_versioning
89
90       This flag indicates whether it's okay for data versioning to be enabled
91       during batch execution.
92
93       If set to ``True`` (the default), then versioning is allowed and
94       therefore the caller need take no special precautions when populating
95       the batch.
96
97       If set to ``False`` then versioning is *not* allowed; if versioning is
98       not enabled for the current process, the caller may populate the batch
99       with no special precautions.  However if versioning *is* enabled, the
100       caller must launch a separate process with versioning disabled, in order
101       to execute the batch.
102
103    .. attribute:: repopulate_when_refresh
104
105       Flag to indicate that when a batch is refreshed, the first step of that
106       should be to re-populate the batch.  The flag is ``False`` by default,
107       in which case the batch is *not* repopulated, i.e. the refresh will work
108       with existing batch rows.
109    """
110    populate_batches = False
111    populate_with_versioning = True
112
113    refresh_with_versioning = True
114    repopulate_when_refresh = False
115
116    execute_with_versioning = True
117
118    def __init__(self, config):
119        self.config = config
120        self.enum = config.get_enum()
121
122    @property
123    def batch_model_class(self):
124        """
125        Reference to the data model class of the batch type for which this
126        handler is responsible, e.g. :class:`rattail.db.model.LabelBatch`.
127        Each handler must define this (or inherit from one that does).
128        """
129        raise NotImplementedError("You must set the 'batch_model_class' attribute "
130                                  "for class '{}'".format(self.__class__.__name__))
131
132    @property
133    def batch_key(self):
134        """
135        The "batch type key" for the handler, e.g. ``'labels'``.  This isn't
136        necessarily unique among handlers, but instead refers to a unique key
137        for the type of batch being handled.  The handler needn't define this,
138        as it is borrowed from :attr:`batch_model_class`.
139        """
140        return self.batch_model_class.batch_key
141
142    def get_model_title(self):
143        return self.batch_model_class.get_model_title()
144
145    def allow_versioning(self, action):
146        if action == 'populate':
147            return self.populate_with_versioning
148        if action == 'refresh':
149            return self.refresh_with_versioning
150        if action == 'execute':
151            return self.execute_with_versioning
152        raise NotImplementedError("unknown batch action: {}".format(action))
153
154    def make_basic_batch(self, session, progress=None, **kwargs):
155        """
156        Make a new "basic" batch, with no customization beyond what is provided
157        by ``kwargs``, which are passed directly to the batch class constructor.
158        """
159        kwargs.setdefault('rowcount', 0)
160        kwargs.setdefault('complete', False)
161        batch = self.batch_model_class(**kwargs)
162        session.add(batch)
163        session.flush()
164        return batch
165
166    def make_batch(self, session, progress=None, **kwargs):
167        """
168        Make a new batch, with initial rows if applicable.
169        """
170        batch = self.make_basic_batch(session, progress=progress, **kwargs)
171        self.init_batch(batch, progress=progress, **kwargs)
172        return batch
173
174    def init_batch(self, batch, progress=None, **kwargs):
175        """
176        Initialize the batch in whatever way might make sense.  Whether this is
177        required at all is up to the batch handler etc.
178        """
179
180    def add_row(self, batch, row):
181        """
182        (Try to) Add the given row to the given batch.  This assumes it is a
183        *new* row, perhaps along with other assumptions?
184        """
185        session = orm.object_session(batch)
186        with session.no_autoflush:
187            batch.data_rows.append(row)
188            self.refresh_row(row)
189        if not row.removed:
190            batch.rowcount += 1
191
192    def purge_batches(self, session, before=None, before_days=90,
193                      delete_all_data=True, progress=None, **kwargs):
194        """
195        Purge all batches which were executed prior to a given date.
196
197        :param before: If provided, must be a timezone-aware datetime object.
198           If not provided, it will be calculated from the current date, using
199           :param:`before_days`.
200
201        :param before_days: Number of days before the current date, to be used
202           as the cutoff date if :param:`before` is not specified.
203
204        :param delete_all_data: Flag indicating whether *all* data should be
205           deleted for each batch being purged.  This flag is passed along to
206           :meth:`delete()`; see that for more info.
207
208        :returns: Integer indicating the number of batches purged.
209        """
210        if not before:
211            before = localtime(self.config).date() - datetime.timedelta(days=before_days)
212            before = datetime.datetime.combine(before, datetime.time(0))
213            before = localtime(self.config, before)
214
215        old_batches = session.query(self.batch_model_class)\
216                             .filter(self.batch_model_class.executed < before)\
217                             .options(orm.joinedload(self.batch_model_class.data_rows))
218        result = Object()
219        result.purged = 0
220
221        def purge(batch, i):
222            self.delete(batch, delete_all_data=delete_all_data, progress=progress)
223            session.delete(batch)
224            result.purged += 1
225            if i % 5 == 0:
226                session.flush()
227
228        self.progress_loop(purge, old_batches, progress,
229                           message="Purging old batches")
230
231        session.flush()
232        return result.purged
233
234    @property
235    def root_datadir(self):
236        """
237        The absolute path of the root folder in which data for this particular
238        type of batch is stored.  The structure of this path is as follows:
239
240        .. code-block:: none
241
242           /{root_batch_data_dir}/{batch_type_key}
243
244        * ``{root_batch_data_dir}`` - Value of the 'batch.files' option in the
245          [rattail] section of config file.
246        * ``{batch_type_key}`` - Unique key for the type of batch it is.
247
248        .. note::
249           While it is likely that the data folder returned by this method
250           already exists, this method does not guarantee it.
251        """
252        return self.config.batch_filedir(self.batch_key)
253
254    def datadir(self, batch):
255        """
256        Returns the absolute path of the folder in which the batch's source
257        data file(s) resides.  Note that the batch must already have been
258        persisted to the database.  The structure of the path returned is as
259        follows:
260
261        .. code-block:: none
262
263           /{root_datadir}/{uuid[:2]}/{uuid[2:]}
264
265        * ``{root_datadir}`` - Value returned by :meth:`root_datadir()`.
266        * ``{uuid[:2]}`` - First two characters of batch UUID.
267        * ``{uuid[2:]}`` - All batch UUID characters *after* the first two.
268
269        .. note::
270           While it is likely that the data folder returned by this method
271           already exists, this method does not guarantee any such thing.  It
272           is typically assumed that the path will have been created by a
273           previous call to :meth:`make_batch()` however.
274        """
275        return os.path.join(self.root_datadir, batch.uuid[:2], batch.uuid[2:])
276
277    def make_datadir(self, batch):
278        """
279        Returns the data folder specific to the given batch, creating if necessary.
280        """
281        datadir = self.datadir(batch)
282        os.makedirs(datadir)
283        return datadir
284
285    # TODO: remove default attr?
286    def set_input_file(self, batch, path, attr='filename'):
287        """
288        Assign the data file found at ``path`` to the batch.  This overwrites
289        the given attribute (``attr``) of the batch and places a copy of the
290        data file in the batch's data folder.
291        """
292        datadir = self.make_datadir(batch)
293        filename = os.path.basename(path)
294        shutil.copyfile(path, os.path.join(datadir, filename))
295        setattr(batch, attr, filename)
296
297    def should_populate(self, batch):
298        """
299        Must return a boolean indicating whether the given batch should be
300        populated from an initial data source, i.e. at time of batch creation.
301        Override this method if you need to inspect the batch in order to
302        determine whether the populate step is needed.  Default behavior is to
303        simply return the value of :attr:`populate_batches`.
304        """
305        return self.populate_batches
306
307    def setup_populate(self, batch, progress=None):
308        """
309        Perform any setup (caching etc.) necessary for populating a batch.
310        """
311
312    def teardown_populate(self, batch, progress=None):
313        """
314        Perform any teardown (cleanup etc.) necessary after populating a batch.
315        """
316
317    def do_populate(self, batch, user, progress=None):
318        self.setup_populate(batch, progress=progress)
319        self.populate(batch, progress=progress)
320        self.teardown_populate(batch, progress=progress)
321        self.refresh_batch_status(batch)
322        return True
323
324    def populate(self, batch, progress=None):
325        """
326        Populate the batch with initial data rows.  It is assumed that the data
327        source to be used will be known by inspecting various properties of the
328        batch itself.
329        """
330        raise NotImplementedError("Please implement `{}.populate()` method".format(batch.__class__.__name__))
331
332    def refreshable(self, batch):
333        """
334        This method should return a boolean indicating whether or not the
335        handler supports a "refresh" operation for the batch, given its current
336        condition.  The default assumes a refresh is allowed unless the batch
337        is executed.
338
339        Note that this (currently) only affects the enabled/disabled state of
340        the Refresh button within the Tailbone batch view.
341        """
342        if batch.executed:
343            return False
344        return True
345
346    def progress_loop(self, *args, **kwargs):
347        return progress_loop(*args, **kwargs)
348
349    def setup_refresh(self, batch, progress=None):
350        """
351        Perform any setup (caching etc.) necessary for refreshing a batch.
352        """
353
354    def teardown_refresh(self, batch, progress=None):
355        """
356        Perform any teardown (cleanup etc.) necessary after refreshing a batch.
357        """
358
359    def do_refresh(self, batch, user, progress=None):
360        self.refresh(batch, progress=progress)
361        return True
362
363    def refresh(self, batch, progress=None):
364        """
365        Perform a full data refresh for the batch.  What exactly this means will
366        depend on the type of batch, and specific handler logic.
367
368        Generally speaking this refresh is meant to use queries etc. to obtain
369        "fresh" data for the batch (header) and all its rows.  In most cases
370        certain data is expected to be "core" to the batch and/or rows, and
371        such data will be left intact, with all *other* data values being
372        re-calculated and/or reset etc.
373        """
374        session = orm.object_session(batch)
375        self.setup_refresh(batch, progress=progress)
376        if self.repopulate_when_refresh:
377            del batch.data_rows[:]
378            batch.rowcount = 0
379            session.flush()
380            self.populate(batch, progress=progress)
381        else:
382            batch.rowcount = 0
383
384            def refresh(row, i):
385                with session.no_autoflush:
386                    self.refresh_row(row)
387                if not row.removed:
388                    batch.rowcount += 1
389
390            self.progress_loop(refresh, batch.active_rows(), progress,
391                               message="Refreshing batch data rows")
392        self.refresh_batch_status(batch)
393        self.teardown_refresh(batch, progress=progress)
394        return True
395
396    def refresh_row(self, row):
397        """
398        This method will be passed a row object which has already been properly
399        added to a batch, and which has basic required fields already
400        populated.  This method is then responsible for further populating all
401        applicable fields for the row, based on current data within the
402        relevant system(s).
403
404        Note that in some cases this method may be called multiple times for
405        the same row, e.g. once when first creating the batch and then later
406        when a user explicitly refreshes the batch.  The method logic must
407        account for this possibility.
408        """
409
410    def remove_row(self, row):
411        """
412        Remove the given row from its batch.  This may delete the row outright
413        from the database, or simply mark it as removed etc.  Defaults to the
414        latter.
415        """
416        if row.removed:
417            raise ValueError("Row is already marked as removed: {}".format(row))
418        batch = row.batch
419        row.removed = True
420        self.refresh_batch_status(batch)
421        if batch.rowcount is not None:
422            batch.rowcount -= 1
423
424    def refresh_batch_status(self, batch):
425        """
426        Update the batch status, as needed...
427        """
428
429    def mark_complete(self, batch, progress=None):
430        batch.complete = True
431
432    def mark_incomplete(self, batch, progress=None):
433        batch.complete = False
434
435    def why_not_execute(self, batch):
436        """
437        This method should return a string indicating the reason why the given
438        batch should not be considered executable.  By default it returns
439        ``None`` which means the batch *is* to be considered executable.
440
441        Note that it is assumed the batch has not already been executed, since
442        execution is globally prevented for such batches.
443        """
444
445    def executable(self, batch):
446        """
447        This method should return a boolean indicating whether or not execution
448        should be allowed for the batch, given its current condition.  The
449        default simply returns ``True`` but you may override as needed.
450
451        Note that this (currently) only affects the enabled/disabled state of
452        the Execute button within the Tailbone batch view.
453        """
454        if batch is None:
455            return True
456        if batch.executed:
457            return False
458        if self.why_not_execute(batch):
459            return False
460        return True
461
462    def auto_executable(self, batch):
463        """
464        Must return a boolean indicating whether the given bath is eligible for
465        "automatic" execution, i.e. immediately after batch is created.
466        """
467        return False
468
469    def do_execute(self, batch, user, progress=None, **kwargs):
470        result = self.execute(batch, user=user, progress=progress, **kwargs)
471        if not result:
472            return False
473        batch.executed = make_utc()
474        batch.executed_by = user
475        return result
476
477    def execute(self, batch, progress=None, **kwargs):
478        """
479        Execute the given batch, with given progress and kwargs.  That is an
480        intentionally generic statement, the meaning of which must be further
481        defined by the handler subclass since default is ``NotImplementedError``.
482        """
483        raise NotImplementedError
484
485    def execute_many(self, batches, progress=None, **kwargs):
486        """
487        Execute a set of batches, with given progress and kwargs.  Default
488        behavior is to simply execute each batch in succession.  Any batches
489        which are already executed are skipped.
490        """
491        now = make_utc()
492        for batch in batches:
493            if not batch.executed:
494                self.execute(batch, progress=progress, **kwargs)
495                batch.executed = now
496                batch.executed_by = kwargs['user']
497        return True
498
499    def delete(self, batch, delete_all_data=True, progress=None, **kwargs):
500        """
501        Delete all data for the batch, including any related (e.g. row)
502        records, as well as files on disk etc.  This method should *not* delete
503        the batch itself however.
504
505        :param delete_all_data: Flag indicating whether *all* data should be
506           deleted.  You should probably set this to ``False`` if in dry-run
507           mode, since deleting *all* data often implies deleting files from
508           disk, which is not transactional and therefore can't be rolled back.
509        """
510        if delete_all_data:
511            if hasattr(batch, 'delete_data'):
512                batch.delete_data(self.config)
513        if hasattr(batch, 'data_rows'):
514            del batch.data_rows[:]
515
516    def setup_clone(self, oldbatch, progress=None):
517        """
518        Perform any setup (caching etc.) necessary for cloning batch.  Note
519        that the ``oldbatch`` arg is the "old" batch, i.e. the one from which a
520        clone is to be created.
521        """
522
523    def teardown_clone(self, newbatch, progress=None):
524        """
525        Perform any teardown (cleanup etc.) necessary after cloning a batch.
526        Note that the ``newbatch`` arg is the "new" batch, i.e. the one which
527        was just created by cloning the old batch.
528        """
529
530    def clone(self, oldbatch, created_by, progress=None):
531        """
532        Clone the given batch as a new batch, and return the new batch.
533        """
534        self.setup_clone(oldbatch, progress=progress)
535        batch_class = self.batch_model_class
536        batch_mapper = orm.class_mapper(batch_class)
537
538        newbatch = batch_class()
539        newbatch.created_by = created_by
540        newbatch.rowcount = 0
541        for name in batch_mapper.columns.keys():
542            if name not in ('uuid', 'id', 'created', 'created_by_uuid', 'rowcount', 'executed', 'executed_by_uuid'):
543                setattr(newbatch, name, getattr(oldbatch, name))
544
545        session = orm.object_session(oldbatch)
546        session.add(newbatch)
547        session.flush()
548
549        row_class = newbatch.row_class
550        row_mapper = orm.class_mapper(row_class)
551
552        def clone_row(oldrow, i):
553            newrow = self.clone_row(oldrow)
554            self.add_row(newbatch, newrow)
555
556        self.progress_loop(clone_row, oldbatch.data_rows, progress,
557                           message="Cloning data rows for new batch")
558
559        self.refresh_batch_status(newbatch)
560        self.teardown_clone(newbatch, progress=progress)
561        return newbatch
562
563    def clone_row(self, oldrow):
564        row_class = self.batch_model_class.row_class
565        row_mapper = orm.class_mapper(row_class)
566        newrow = row_class()
567        for name in row_mapper.columns.keys():
568            if name not in ('uuid', 'batch_uuid', 'sequence'):
569                setattr(newrow, name, getattr(oldrow, name))
570        return newrow
571
572    def cache_model(self, session, model, **kwargs):
573        return cache_model(session, model, **kwargs)
574
575
576def get_batch_types(config):
577    """
578    Returns the list of available batch type keys.
579    """
580    model = config.get_model()
581
582    keys = []
583    for name in dir(model):
584        if name == 'BatchMixin':
585            continue
586        obj = getattr(model, name)
587        if isinstance(obj, type):
588            if issubclass(obj, model.Base):
589                if issubclass(obj, model.BatchMixin):
590                    keys.append(obj.batch_key)
591
592    keys.sort()
593    return keys
594
595
596def get_batch_handler(config, batch_key, default=None, error=True):
597    """
598    Returns a batch handler object corresponding to the given batch key.
599    """
600    spec = config.get('rattail.batch', '{}.handler'.format(batch_key), default=default)
601    if error and not spec:
602        raise ValueError("handler spec not found for batch type: {}".format(batch_key))
603    handler = load_object(spec)(config)
604    return handler
Note: See TracBrowser for help on using the repository browser.