# -*- coding: cp1252 -*- ## #

This module is part of the xlrd package, which is released under a BSD-style licence.

## # 2007-04-22 SJM Remove experimental "trimming" facility. from biffh import * from timemachine import * from struct import unpack from formula import dump_formula, decompile_formula, rangename2d from formatting import nearest_colour_index import time DEBUG = 0 _WINDOW2_options = ( # Attribute names and initial values to use in case # a WINDOW2 record is not written. ("show_formulas", 0), ("show_grid_lines", 1), ("show_sheet_headers", 1), ("panes_are_frozen", 0), ("show_zero_values", 1), ("automatic_grid_line_colour", 1), ("columns_from_right_to_left", 0), ("show_outline_symbols", 1), ("remove_splits_if_pane_freeze_is_removed", 0), ("sheet_selected", 0), # "sheet_visible" appears to be merely a clone of "sheet_selected". # The real thing is the visibility attribute from the BOUNDSHEET record. ("sheet_visible", 0), ("show_in_page_break_preview", 0), ) ## #

Contains the data for one worksheet.

# #

In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a # column index, counting from zero. # Negative values for row/column indexes and slice positions are supported in the expected fashion.

# #

For information about cell types and cell values, refer to the documentation of the Cell class.

# #

WARNING: You don't call this class yourself. You access Sheet objects via the Book object that # was returned when you called xlrd.open_workbook("myfile.xls").

class Sheet(BaseObject): ## # Name of sheet. name = '' ## # Number of rows in sheet. A row index is in range(thesheet.nrows). nrows = 0 ## # Number of columns in sheet. A column index is in range(thesheet.ncols). ncols = 0 ## # The map from a column index to a Colinfo object. Often there is an entry # in COLINFO records for all column indexes in range(257). # Note that xlrd ignores the entry for the non-existent # 257th column. On the other hand, there may be no entry for unused columns. #
-- New in version 0.6.1 colinfo_map = {} ## # The map from a row index to a Rowinfo object. Note that it is possible # to have missing entries -- at least one source of XLS files doesn't # bother writing ROW records. #
-- New in version 0.6.1 rowinfo_map = {} ## # List of address ranges of cells containing column labels. # These are set up in Excel by Insert > Name > Labels > Columns. #
-- New in version 0.6.0 #
How to deconstruct the list: #

    # for crange in thesheet.col_label_ranges:
    #     rlo, rhi, clo, chi = crange
    #     for rx in xrange(rlo, rhi):
    #         for cx in xrange(clo, chi):
    #             print "Column label at (rowx=%d, colx=%d) is %r" \
    #                 (rx, cx, thesheet.cell_value(rx, cx))
    #

col_label_ranges = [] ## # List of address ranges of cells containing row labels. # For more details, see col_label_ranges above. #
-- New in version 0.6.0 row_label_ranges = [] ## # List of address ranges of cells which have been merged. # These are set up in Excel by Format > Cells > Alignment, then ticking # the "Merge cells" box. #
-- New in version 0.6.1 #
How to deconstruct the list: #

    # for crange in thesheet.merged_cells:
    #     rlo, rhi, clo, chi = crange
    #     for rowx in xrange(rlo, rhi):
    #         for colx in xrange(clo, chi):
    #             # cell (rlo, clo) (the top left one) will carry the data
    #             # and formatting info; the remainder will be recorded as
    #             # blank cells, but a renderer will apply the formatting info
    #             # for the top left cell (e.g. border, pattern) to all cells in
    #             # the range.
    #

merged_cells = [] ## # Default column width from DEFCOLWIDTH record, else None. # From the OOo docs:
# """Column width in characters, using the width of the zero character # from default font (first FONT record in the file). Excel adds some # extra space to the default width, depending on the default font and # default font size. The algorithm how to exactly calculate the resulting # column width is not known.
# Example: The default width of 8 set in this record results in a column # width of 8.43 using Arial font with a size of 10 points."""
# For the default hierarchy, refer to the Colinfo class above. #
-- New in version 0.6.1 defcolwidth = None ## # Default column width from STANDARDWIDTH record, else None. # From the OOo docs:
# """Default width of the columns in 1/256 of the width of the zero # character, using default font (first FONT record in the file)."""
# For the default hierarchy, refer to the Colinfo class above. #
-- New in version 0.6.1 standardwidth = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_row_height = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_row_height_mismatch = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_row_hidden = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_additional_space_above = None ## # Default value to be used for a row if there is # no ROW record for that row. # From the optional DEFAULTROWHEIGHT record. default_additional_space_below = None ## # Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden # by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden # only by VBA macro). visibility = 0 ## # A 256-element tuple corresponding to the contents of the GCW record for this sheet. # If no such record, treat as all bits zero. # Applies to BIFF4-7 only. See docs of Colinfo class for discussion. gcw = (0, ) * 256 def __init__(self, book, position, name, number): self.book = book self.biff_version = book.biff_version self._position = position self.logfile = book.logfile self.pickleable = book.pickleable self.dont_use_array = not(array_array and (CAN_PICKLE_ARRAY or not book.pickleable)) self.name = name self.number = number self.verbosity = book.verbosity self.formatting_info = book.formatting_info self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map self.nrows = 0 # actual, including possibly empty cells self.ncols = 0 self._maxdatarowx = -1 # highest rowx containing a non-empty cell self._maxdatacolx = -1 # highest colx containing a non-empty cell self._dimnrows = 0 # as per DIMENSIONS record self._dimncols = 0 self._cell_values = [] self._cell_types = [] self._cell_xf_indexes = [] self._need_fix_ragged_rows = 0 self.defcolwidth = None self.standardwidth = None self.default_row_height = None self.default_row_height_mismatch = 0 self.default_row_hidden = 0 self.default_additional_space_above = 0 self.default_additional_space_below = 0 self.colinfo_map = {} self.rowinfo_map = {} self.col_label_ranges = [] self.row_label_ranges = [] self.merged_cells = [] self._xf_index_stats = [0, 0, 0, 0] self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record for attr, defval in _WINDOW2_options: setattr(self, attr, defval) self.first_visible_rowx = 0 self.first_visible_colx = 0 self.gridline_colour_index = 0x40 self.gridline_colour_rgb = None # pre-BIFF8 self.cached_page_break_preview_mag_factor = 0 self.cached_normal_view_mag_factor = 0 #### Don't initialise this here, use class attribute initialisation. #### self.gcw = (0, ) * 256 #### if self.biff_version >= 80: self.utter_max_rows = 65536 else: self.utter_max_rows = 16384 ## # Cell object in the given row and column. def cell(self, rowx, colx): if self.formatting_info: xfx = self.cell_xf_index(rowx, colx) else: xfx = None return Cell( self._cell_types[rowx][colx], self._cell_values[rowx][colx], xfx, ) ## # Value of the cell in the given row and column. def cell_value(self, rowx, colx): return self._cell_values[rowx][colx] ## # Type of the cell in the given row and column. # Refer to the documentation of the Cell class. def cell_type(self, rowx, colx): return self._cell_types[rowx][colx] ## # XF index of the cell in the given row and column. # This is an index into Book.raw_xf_list and Book.computed_xf_list. #
-- New in version 0.6.1 def cell_xf_index(self, rowx, colx): self.req_fmt_info() xfx = self._cell_xf_indexes[rowx][colx] if xfx > -1: self._xf_index_stats[0] += 1 return xfx # Check for a row xf_index try: xfx = self.rowinfo_map[rowx].xf_index if xfx > -1: self._xf_index_stats[1] += 1 return xfx except KeyError: pass # Check for a column xf_index try: xfx = self.colinfo_map[colx].xf_index assert xfx > -1 self._xf_index_stats[2] += 1 return xfx except KeyError: # If all else fails, 15 is used as hardwired global default xf_index. self._xf_index_stats[3] += 1 return 15 ## # Returns a sequence of the Cell objects in the given row. def row(self, rowx): return [ self.cell(rowx, colx) for colx in xrange(self.ncols) ] ## # Returns a slice of the types # of the cells in the given row. def row_types(self, rowx, start_colx=0, end_colx=None): if end_colx is None: return self._cell_types[rowx][start_colx:] return self._cell_types[rowx][start_colx:end_colx] ## # Returns a slice of the values # of the cells in the given row. def row_values(self, rowx, start_colx=0, end_colx=None): if end_colx is None: return self._cell_values[rowx][start_colx:] return self._cell_values[rowx][start_colx:end_colx] ## # Returns a slice of the Cell objects in the given row. def row_slice(self, rowx, start_colx=0, end_colx=None): nc = self.ncols if start_colx < 0: start_colx += nc if start_colx < 0: start_colx = 0 if end_colx is None or end_colx > nc: end_colx = nc elif end_colx < 0: end_colx += nc return [ self.cell(rowx, colx) for colx in xrange(start_colx, end_colx) ] ## # Returns a slice of the Cell objects in the given column. def col_slice(self, colx, start_rowx=0, end_rowx=None): nr = self.nrows if start_rowx < 0: start_rowx += nr if start_rowx < 0: start_rowx = 0 if end_rowx is None or end_rowx > nr: end_rowx = nr elif end_rowx < 0: end_rowx += nr return [ self.cell(rowx, colx) for rowx in xrange(start_rowx, end_rowx) ] ## # Returns a slice of the values of the cells in the given column. def col_values(self, colx, start_rowx=0, end_rowx=None): nr = self.nrows if start_rowx < 0: start_rowx += nr if start_rowx < 0: start_rowx = 0 if end_rowx is None or end_rowx > nr: end_rowx = nr elif end_rowx < 0: end_rowx += nr return [ self._cell_values[rowx][colx] for rowx in xrange(start_rowx, end_rowx) ] ## # Returns a slice of the types of the cells in the given column. def col_types(self, colx, start_rowx=0, end_rowx=None): nr = self.nrows if start_rowx < 0: start_rowx += nr if start_rowx < 0: start_rowx = 0 if end_rowx is None or end_rowx > nr: end_rowx = nr elif end_rowx < 0: end_rowx += nr return [ self._cell_types[rowx][colx] for rowx in xrange(start_rowx, end_rowx) ] ## # Returns a sequence of the Cell objects in the given column. def col(self, colx): return self.col_slice(colx) # Above two lines just for the docs. Here's the real McCoy: col = col_slice # === Following methods are used in building the worksheet. # === They are not part of the API. def extend_cells(self, nr, nc): # print "extend_cells_2", self.nrows, self.ncols, nr, nc assert 1 <= nc <= 256 assert 1 <= nr <= self.utter_max_rows if nr <= self.nrows: # New cell is in an existing row, so extend that row (if necessary). # Note that nr < self.nrows means that the cell data # is not in ascending row order!! self._need_fix_ragged_rows = 1 nrx = nr - 1 trow = self._cell_types[nrx] tlen = len(trow) nextra = max(nc, self.ncols) - tlen if nextra > 0: xce = XL_CELL_EMPTY if self.dont_use_array: trow.extend([xce] * nextra) if self.formatting_info: self._cell_xf_indexes[nrx].extend([-1] * nextra) else: aa = array_array trow.extend(aa('B', [xce]) * nextra) if self.formatting_info: self._cell_xf_indexes[nrx].extend(aa('h', [-1]) * nextra) self._cell_values[nrx].extend([''] * nextra) if nc > self.ncols: self.ncols = nc self._need_fix_ragged_rows = 1 if nr > self.nrows: scta = self._cell_types.append scva = self._cell_values.append scxa = self._cell_xf_indexes.append fmt_info = self.formatting_info xce = XL_CELL_EMPTY nc = self.ncols if self.dont_use_array: for _unused in xrange(self.nrows, nr): scta([xce] * nc) scva([''] * nc) if fmt_info: scxa([-1] * nc) else: aa = array_array for _unused in xrange(self.nrows, nr): scta(aa('B', [xce]) * nc) scva([''] * nc) if fmt_info: scxa(aa('h', [-1]) * nc) self.nrows = nr def fix_ragged_rows(self): t0 = time.time() ncols = self.ncols xce = XL_CELL_EMPTY aa = array_array s_cell_types = self._cell_types s_cell_values = self._cell_values s_cell_xf_indexes = self._cell_xf_indexes s_dont_use_array = self.dont_use_array s_fmt_info = self.formatting_info totrowlen = 0 for rowx in xrange(self.nrows): trow = s_cell_types[rowx] rlen = len(trow) totrowlen += rlen nextra = ncols - rlen if nextra > 0: s_cell_values[rowx][rlen:] = [''] * nextra if s_dont_use_array: trow[rlen:] = [xce] * nextra if s_fmt_info: s_cell_xf_indexes[rowx][rlen:] = [-1] * nextra else: trow.extend(aa('B', [xce]) * nextra) if s_fmt_info: s_cell_xf_indexes[rowx][rlen:] = aa('h', [-1]) * nextra self._fix_ragged_rows_time = time.time() - t0 if 0 and self.nrows: avgrowlen = float(totrowlen) / self.nrows print >> self.logfile, \ "sheet %d: avg row len %.1f; max row len %d" \ % (self.number, avgrowlen, self.ncols) def tidy_dimensions(self): if self.verbosity >= 3: fprintf(self.logfile, "tidy_dimensions: nrows=%d ncols=%d _need_fix_ragged_rows=%d\n", self.nrows, self.ncols, self._need_fix_ragged_rows, ) if 1 and self.merged_cells: nr = nc = 0 umaxrows = self.utter_max_rows for crange in self.merged_cells: rlo, rhi, clo, chi = crange if not (0 <= rlo < rhi <= umaxrows) \ or not (0 <= clo < chi <= 256): fprintf(self.logfile, "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n", self.number, self.name, crange) if rhi > nr: nr = rhi if chi > nc: nc = chi self.extend_cells(nr, nc) if self.verbosity >= 1 \ and (self.nrows != self._dimnrows or self.ncols != self._dimncols): fprintf(self.logfile, "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n", self.number, self.name, self._dimnrows, self._dimncols, self.nrows, self.ncols, ) if self._need_fix_ragged_rows: self.fix_ragged_rows() def put_cell(self, rowx, colx, ctype, value, xf_index): try: self._cell_types[rowx][colx] = ctype self._cell_values[rowx][colx] = value if self.formatting_info: self._cell_xf_indexes[rowx][colx] = xf_index except IndexError: # print >> self.logfile, "put_cell extending", rowx, colx self.extend_cells(rowx+1, colx+1) try: self._cell_types[rowx][colx] = ctype self._cell_values[rowx][colx] = value if self.formatting_info: self._cell_xf_indexes[rowx][colx] = xf_index except: print >> self.logfile, "put_cell", rowx, colx raise except: print >> self.logfile, "put_cell", rowx, colx raise def put_blank_cell(self, rowx, colx, xf_index): # This is used for cells from BLANK and MULBLANK records ctype = XL_CELL_BLANK value = '' try: self._cell_types[rowx][colx] = ctype self._cell_values[rowx][colx] = value self._cell_xf_indexes[rowx][colx] = xf_index except IndexError: # print >> self.logfile, "put_cell extending", rowx, colx self.extend_cells(rowx+1, colx+1) try: self._cell_types[rowx][colx] = ctype self._cell_values[rowx][colx] = value self._cell_xf_indexes[rowx][colx] = xf_index except: print >> self.logfile, "put_cell", rowx, colx raise except: print >> self.logfile, "put_cell", rowx, colx raise def put_number_cell(self, rowx, colx, value, xf_index): ctype = self._xf_index_to_xl_type_map[xf_index] try: self._cell_types[rowx][colx] = ctype self._cell_values[rowx][colx] = value if self.formatting_info: self._cell_xf_indexes[rowx][colx] = xf_index except IndexError: # print >> self.logfile, "put_number_cell extending", rowx, colx self.extend_cells(rowx+1, colx+1) try: self._cell_types[rowx][colx] = ctype self._cell_values[rowx][colx] = value if self.formatting_info: self._cell_xf_indexes[rowx][colx] = xf_index except: print >> self.logfile, "put_number_cell", rowx, colx raise except: print >> self.logfile, "put_number_cell", rowx, colx raise # === Methods after this line neither know nor care about how cells are stored. def read(self, bk): global rc_stats DEBUG = 0 blah = DEBUG or self.verbosity >= 2 blah_rows = DEBUG or self.verbosity >= 4 blah_formulas = 0 and blah oldpos = bk._position bk.position(self._position) XL_SHRFMLA_ETC_ETC = ( XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2, XL_ARRAY2, XL_TABLEOP_B2, ) self_put_number_cell = self.put_number_cell self_put_cell = self.put_cell self_put_blank_cell = self.put_blank_cell local_unpack = unpack bk_get_record_parts = bk.get_record_parts bv = self.biff_version fmt_info = self.formatting_info eof_found = 0 while 1: # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position rc, data_len, data = bk_get_record_parts() # if rc in rc_stats: # rc_stats[rc] += 1 # else: # rc_stats[rc] = 1 # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data) if rc == XL_NUMBER: rowx, colx, xf_index, d = local_unpack('> self.logfile, \ "*** NOTE: ROW record has row index %d; " \ "should have 0 <= rowx < %d -- record ignored!" \ % (rowx, self.utter_max_rows) continue r = Rowinfo() # Using upkbits() is far too slow on a file # with 30 sheets each with 10K rows :-( # upkbits(r, bits1, ( # ( 0, 0x7FFF, 'height'), # (15, 0x8000, 'has_default_height'), # )) # upkbits(r, bits2, ( # ( 0, 0x00000007, 'outline_level'), # ( 4, 0x00000010, 'outline_group_starts_ends'), # ( 5, 0x00000020, 'hidden'), # ( 6, 0x00000040, 'height_mismatch'), # ( 7, 0x00000080, 'has_default_xf_index'), # (16, 0x0FFF0000, 'xf_index'), # (28, 0x10000000, 'additional_space_above'), # (29, 0x20000000, 'additional_space_below'), # )) # So: r.height = bits1 & 0x7fff r.has_default_height = (bits1 >> 15) & 1 r.outline_level = bits2 & 7 r.outline_group_starts_ends = (bits2 >> 4) & 1 r.hidden = (bits2 >> 5) & 1 r.height_mismatch = (bits2 >> 6) & 1 r.has_default_xf_index = (bits2 >> 7) & 1 r.xf_index = (bits2 >> 16) & 0xfff r.additional_space_above = (bits2 >> 28) & 1 r.additional_space_below = (bits2 >> 29) & 1 if not r.has_default_xf_index: r.xf_index = -1 self.rowinfo_map[rowx] = r if 0 and r.xf_index > -1: fprintf(self.logfile, "**ROW %d %d %d\n", self.number, rowx, r.xf_index) if blah_rows: print >> self.logfile, 'ROW', rowx, bits1, bits2 r.dump(self.logfile, header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) elif rc & 0xff == XL_FORMULA: # 06, 0206, 0406 # DEBUG = 1 # if DEBUG: print "FORMULA: rc: 0x%04x data: %r" % (rc, data) rowx, colx, xf_index, flags = local_unpack('> self.logfile, \ "*** NOTE: COLINFO record has first col index %d, last %d; " \ "should have 0 <= first <= last <= 255 -- record ignored!" \ % (first_colx, last_colx) del c continue upkbits(c, flags, ( ( 0, 0x0001, 'hidden'), ( 1, 0x0002, 'bit1_flag'), # *ALL* colinfos created by Excel in "default" cases are 0x0002!! # Maybe it's "locked" by analogy with XFProtection data. ( 8, 0x0700, 'outline_level'), (12, 0x1000, 'collapsed'), )) for colx in xrange(first_colx, last_colx+1): if colx > 255: break # Excel does 0 to 256 inclusive self.colinfo_map[colx] = c if 0: fprintf(self.logfile, "**COL %d %d %d\n", self.number, colx, c.xf_index) if blah: fprintf( self.logfile, "COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n", self.number, first_colx, last_colx, c.width, c.xf_index, flags, ) c.dump(self.logfile, header='===') elif rc == XL_DEFCOLWIDTH: self.defcolwidth, = local_unpack("> self.logfile, 'DEFCOLWIDTH', self.defcolwidth elif rc == XL_STANDARDWIDTH: if data_len != 2: print >> self.logfile, '*** ERROR *** STANDARDWIDTH', data_len, repr(data) self.standardwidth, = local_unpack("> self.logfile, 'STANDARDWIDTH', self.standardwidth elif rc == XL_GCW: if not fmt_info: continue # useless w/o COLINFO assert data_len == 34 assert data[0:2] == "\x20\x00" iguff = unpack("<8i", data[2:34]) gcw = [] for bits in iguff: for j in xrange(32): gcw.append(bits & 1) bits >>= 1 self.gcw = tuple(gcw) if 0: showgcw = "".join(map(lambda x: "F "[x], gcw)).rstrip().replace(' ', '.') print "GCW:", showgcw elif rc == XL_BLANK: if not fmt_info: continue rowx, colx, xf_index = local_unpack('> self.logfile, "BLANK", rowx, colx, xf_index self_put_blank_cell(rowx, colx, xf_index) elif rc == XL_MULBLANK: # 00BE if not fmt_info: continue mul_row, mul_first = local_unpack('> self.logfile, "MULBLANK", mul_row, mul_first, mul_last pos = 4 for colx in xrange(mul_first, mul_last+1): xf_index, = local_unpack('> self.logfile, "SHEET.READ: EOF" eof_found = 1 break elif rc == XL_OBJ: bk.handle_obj(data) elif rc in bofcodes: ##### EMBEDDED BOF ##### version, boftype = local_unpack('> self.logfile, \ "*** Unexpected embedded BOF (0x%04x) at offset %d: version=0x%04x type=0x%04x" \ % (rc, bk._position - data_len - 4, version, boftype) while 1: code, data_len, data = bk.get_record_parts() if code == XL_EOF: break if DEBUG: print >> self.logfile, "---> found EOF" elif rc == XL_COUNTRY: bk.handle_country(data) elif rc == XL_LABELRANGES: pos = 0 pos = unpack_cell_range_address_list_update_pos( self.row_label_ranges, data, pos, bv, addr_size=8, ) pos = unpack_cell_range_address_list_update_pos( self.col_label_ranges, data, pos, bv, addr_size=8, ) assert pos == data_len elif rc == XL_ARRAY: row1x, rownx, col1x, colnx, array_flags, tokslen = \ local_unpack("= 80 num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \ unpack("<6H", data[0:12]) if self.verbosity >= 1: fprintf(self.logfile, "\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n" \ "*** in Sheet %d (%r).\n" \ "*** %d CF record(s); needs_recalc_or_redraw = %d\n" \ "*** Bounding box is %s\n", self.number, self.name, num_CFs, needs_recalc, rangename2d(browx1, browx2+1, bcolx1, bcolx2+1), ) olist = [] # updated by the function pos = unpack_cell_range_address_list_update_pos( olist, data, 12, bv, addr_size=8) # print >> self.logfile, repr(result), len(result) if self.verbosity >= 1: fprintf(self.logfile, "*** %d individual range(s):\n" \ "*** %s\n", len(olist), ", ".join([rangename2d(*coords) for coords in olist]), ) elif rc == XL_CF: if not fmt_info: continue cf_type, cmp_op, sz1, sz2, flags = unpack("> 26) & 1 bord_block = (flags >> 28) & 1 patt_block = (flags >> 29) & 1 if self.verbosity >= 1: fprintf(self.logfile, "\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n" \ "*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n" \ "*** optional data blocks: font=%d, border=%d, pattern=%d\n", cf_type, cmp_op, sz1, sz2, flags, font_block, bord_block, patt_block, ) # hex_char_dump(data, 0, data_len) pos = 12 if font_block: (font_height, font_options, weight, escapement, underline, font_colour_index, two_bits, font_esc, font_underl) = \ unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118]) font_style = (two_bits > 1) & 1 posture = (font_options > 1) & 1 font_canc = (two_bits > 7) & 1 cancellation = (font_options > 7) & 1 if self.verbosity >= 1: fprintf(self.logfile, "*** Font info: height=%d, weight=%d, escapement=%d,\n" \ "*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n" \ "*** style=%d, posture=%d, canc=%d, cancellation=%d\n", font_height, weight, escapement, underline, font_colour_index, font_esc, font_underl, font_style, posture, font_canc, cancellation, ) pos += 118 if bord_block: pos += 8 if patt_block: pos += 4 fmla1 = data[pos:pos+sz1] pos += sz1 if blah and sz1: fprintf(self.logfile, "*** formula 1:\n", ) dump_formula(bk, fmla1, sz1, bv, reldelta=0, blah=1) fmla2 = data[pos:pos+sz2] pos += sz2 assert pos == data_len if blah and sz2: fprintf(self.logfile, "*** formula 2:\n", ) dump_formula(bk, fmla2, sz2, bv, reldelta=0, blah=1) elif rc == XL_DEFAULTROWHEIGHT: if data_len == 4: bits, self.default_row_height = unpack("> 1) & 1 self.default_additional_space_above = (bits >> 2) & 1 self.default_additional_space_below = (bits >> 3) & 1 elif rc == XL_MERGEDCELLS: if not fmt_info: continue pos = unpack_cell_range_address_list_update_pos( self.merged_cells, data, 0, bv, addr_size=8) if blah: fprintf(self.logfile, "MERGEDCELLS: %d ranges\n", int_floor_div(pos - 2, 8)) assert pos == data_len, \ "MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len) elif rc == XL_WINDOW2: if bv >= 80: (options, self.first_visible_rowx, self.first_visible_colx, self.gridline_colour_index, self.cached_page_break_preview_mag_factor, self.cached_normal_view_mag_factor ) = unpack(">= 1 # print "WINDOW2: visible=%d selected=%d" \ # % (self.sheet_visible, self.sheet_selected) #### all of the following are for BIFF <= 4W elif bv <= 45: if rc == XL_FORMAT or rc == XL_FORMAT2: bk.handle_format(data) elif rc == XL_FONT or rc == XL_FONT_B3B4: bk.handle_font(data) elif rc == XL_STYLE: bk.handle_style(data) elif rc == XL_PALETTE: bk.handle_palette(data) elif rc == XL_BUILTINFMTCOUNT: bk.handle_builtinfmtcount(data) elif rc == XL_XF4 or rc == XL_XF3: #### N.B. not XL_XF bk.handle_xf(data) elif rc == XL_DATEMODE: bk.handle_datemode(data) elif rc == XL_CODEPAGE: bk.handle_codepage(data) elif rc == XL_FILEPASS: bk.handle_filepass(data) elif rc == XL_WRITEACCESS: bk.handle_writeaccess(data) else: # if DEBUG: print "SHEET.READ: Unhandled record type %02x %d bytes %r" % (rc, data_len, data) pass if not eof_found: raise XLRDError("Sheet %d (%r) missing EOF record" \ % (self.number, self.name)) self.tidy_dimensions() bk.position(oldpos) return 1 def req_fmt_info(self): if not self.formatting_info: raise XLRDError("Feature requires open_workbook(..., formatting_info=True)") ## # Determine column display width. #
-- New in version 0.6.1 #
# @param colx Index of the queried column, range 0 to 255. # Note that it is possible to find out the width that will be used to display # columns with no cell information e.g. column IV (colx=255). # @return The column width that will be used for displaying # the given column by Excel, in units of 1/256th of the width of a # standard character (the digit zero in the first font). def computed_column_width(self, colx): self.req_fmt_info() if self.biff_version >= 80: colinfo = self.colinfo_map.get(colx, None) if colinfo is not None: return colinfo.width if self.standardwidth is not None: return self.standardwidth elif self.biff_version >= 40: if self.gcw[colx]: if self.standardwidth is not None: return self.standardwidth else: colinfo = self.colinfo_map.get(colx, None) if colinfo is not None: return colinfo.width elif self.biff_version == 30: colinfo = self.colinfo_map.get(colx, None) if colinfo is not None: return colinfo.width # All roads lead to Rome and the DEFCOLWIDTH ... if self.defcolwidth is not None: return self.defcolwidth * 256 return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record # === helpers === def unpack_RK(rk_str): flags = ord(rk_str[0]) if flags & 2: # There's a SIGNED 30-bit integer in there! i, = unpack('>= 2 # div by 4 to drop the 2 flag bits if flags & 1: return i / 100.0 return float(i) else: # It's the most significant 30 bits of an IEEE 754 64-bit FP number d, = unpack('Contains the data for one cell.

# #

WARNING: You don't call this class yourself. You access Cell objects # via methods of the Sheet object(s) that you found in the Book object that # was returned when you called xlrd.open_workbook("myfile.xls").

Cell objects have three attributes: ctype is an int, value # (which depends on ctype) and xf_index. # If "formatting_info" is not enabled when the workbook is opened, xf_index will be None. # The following table describes the types of cells and how their values # are represented in Python.

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

Type symbol	Type number	Python value
XL_CELL_EMPTY	0	empty string u''
XL_CELL_TEXT	1	a Unicode string
XL_CELL_NUMBER	2	float
XL_CELL_DATE	3	float
XL_CELL_BOOLEAN	4	int; 1 means TRUE, 0 means FALSE
XL_CELL_ERROR	5	int representing internal Excel codes; for a text representation, # refer to the supplied dictionary error_text_from_code
XL_CELL_BLANK	6	empty string u''. Note: this type will appear only when # open_workbook(..., formatting_info=True) is used.

class Cell(BaseObject): __slots__ = ['ctype', 'value', 'xf_index'] def __init__(self, ctype, value, xf_index=None): self.ctype = ctype self.value = value self.xf_index = xf_index def __repr__(self): if self.xf_index is None: return "%s:%r" % (ctype_text[self.ctype], self.value) else: return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index) ## # There is one and only one instance of an empty cell -- it's a singleton. This is it. # You may use a test like "acell is empty_cell". empty_cell = Cell(XL_CELL_EMPTY, '') ##### =============== Colinfo and Rowinfo ============================== ##### ## # Width and default formatting information that applies to one or # more columns in a sheet. Derived from COLINFO records. # #

Here is the default hierarchy for width, according to the OOo docs: # #
"""In BIFF3, if a COLINFO record is missing for a column, # the width specified in the record DEFCOLWIDTH is used instead. # #
In BIFF4-BIFF7, the width set in this [COLINFO] record is only used, # if the corresponding bit for this column is cleared in the GCW # record, otherwise the column width set in the DEFCOLWIDTH record # is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]). # #
In BIFF8, if a COLINFO record is missing for a column, # the width specified in the record STANDARDWIDTH is used. # If this [STANDARDWIDTH] record is also missing, # the column width of the record DEFCOLWIDTH is used instead.""" #
# # Footnote: The docs on the GCW record say this: # """
# If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH # record. If a bit is cleared, the corresponding column uses the width set in the # COLINFO record for this column. #
If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if # the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH # record of the worksheet will be used instead. #
"""
# At the moment (2007-01-17) xlrd is going with the GCW version of the story. # Reference to the source may be useful: see the computed_column_width(colx) method # of the Sheet class. #
-- New in version 0.6.1 #

class Colinfo(BaseObject): ## # Width of the column in 1/256 of the width of the zero character, # using default font (first FONT record in the file). width = 0 ## # XF index to be used for formatting empty cells. xf_index = -1 ## # 1 = column is hidden hidden = 0 ## # Value of a 1-bit flag whose purpose is unknown # but is often seen set to 1 bit1_flag = 0 ## # Outline level of the column, in range(7). # (0 = no outline) outline_level = 0 ## # 1 = column is collapsed collapsed = 0 ## # Height and default formatting information that applies to a row in a sheet. # Derived from ROW records. #
-- New in version 0.6.1 class Rowinfo(BaseObject): ## # Height of the row, in twips. One twip == 1/20 of a point height = 0 ## # 0 = Row has custom height; 1 = Row has default height has_default_height = 0 ## # Outline level of the row outline_level = 0 ## # 1 = Outline group starts or ends here (depending on where the # outline buttons are located, see WSBOOL record [TODO ??]), # and is collapsed outline_group_starts_ends = 0 ## # 1 = Row is hidden (manually, or by a filter or outline group) hidden = 0 ## # 1 = Row height and default font height do not match height_mismatch = 0 ## # 1 = the xf_index attribute is usable; 0 = ignore it has_default_xf_index = 0 ## # Index to default XF record for empty cells in this row. # Don't use this if has_default_xf_index == 0. xf_index = -9999 ## # This flag is set, if the upper border of at least one cell in this row # or if the lower border of at least one cell in the row above is # formatted with a thick line style. Thin and medium line styles are not # taken into account. additional_space_above = 0 ## # This flag is set, if the lower border of at least one cell in this row # or if the upper border of at least one cell in the row below is # formatted with a medium or thick line style. Thin line styles are not # taken into account. additional_space_below = 0