This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: shrinking /usr/lib/locale


[Jakub Jelinek]

> We used similar scripts initially, but after we added all locales in UTF-8
> this took eons, so ATM we're using a program for this (attached below).

Hi, people.  Here is my cut of something similar, in case useful. :-)
I call it `link-dups'.  Forget about `-sar' options if you want hard links.

#!/usr/bin/env python
# Replace identical duplicated files with links.
# Copyright © 1997, 1998, 1999, 2000 Progiciels Bourbeau-Pinard inc.
# François Pinard <pinard@iro.umontreal.ca>, janvier 1991.

# All ordinary files in the current hierarchy are checksummed,
# then files having identical sums are compared.  Disk space is
# reclaimed by replacing identical files by links.

"""\
Usage: link-dups [OPTION]... [PATH]...

  -v  Display groups of identical files
  -n  Dry run, do not touch file system
  -s  Best symbolic links instead of hard links
  -a  Absolute symbolic links instead of best
  -r  Relative symbolic links instead of best
"""

import getopt, os, stat, string, sys

class run:
    dry = 0
    verbose = 0
    absolute = 0
    symbolic = 0
    relative = 0
    currents = string.split(os.getcwd(), '/')

def main(*arguments):
    if not arguments:
        sys.stdout.write(__doc__)
        sys.exit(0)
    # Decode call.
    options, arguments = getopt.getopt(arguments, 'anrsv')
    for option, value in options:
        if option == '-a':
            run.absolute = 1
        if option == '-n':
            run.dry = 1
        elif option == '-r':
            run.relative = 1
        elif option == '-s':
            run.symbolic = 1
        elif option == '-v':
            run.verbose = 1
    assert not (run.absolute and run.relative)
    if run.absolute or run.relative:
        run.symbolic = 1
    # Establish links.
    current_sum = current_file = None
    current_dev = current_ino = 0
    for line in os.popen('find %s -type f -print0 | xargs -0 md5sum | sort'
                         % string.join(arguments)).readlines():
        sum, file = string.split(line[:-1], None, 1)
        if file[:2] == './':
            file = file[2:]
        info = os.stat(file)
        links = info[stat.ST_NLINK]
        if sum != current_sum:
            if run.verbose:
                print
                print '#', links, file
            current_sum, current_file = sum, file
            current_dev, current_ino = info[stat.ST_DEV], info[stat.ST_INO]
            continue
        if run.verbose:
            print '#', links, file
        if ((info[stat.ST_DEV] == current_dev
             and info[stat.ST_INO] == current_ino)):
            continue
        #if info[stat.ST_SIZE] == 0:
        #    continue
        if open(current_file).read() != open(file).read():
            continue
        if run.symbolic:
            symlink(current_file, file)
        else:
            hardlink(current_file, file)

def hardlink(origin, destination):
    sys.stdout.write('ln -f %s %s\n' % (origin, destination))
    if not run.dry:
        os.remove(destination)
        os.link(origin, destination)

def symlink(origin, destination):
    assert os.path.exists(origin)
    assert not os.path.islink(origin)
    origin = nicest_path(destination, origin,
                         absolute=absolute, relative=relative)
    assert origins
    sys.stdout.write('ln -sf %s %s\n' % (origin, destination))
    if not run.dry:
        os.remove(destination)
        os.symlink(origin, destination)

def nicest_path(start, goal, absolute=0, relative=0):
    def get_parts(path):
        if path[0] == '~':
            return string.split(os.path.expanduser(path), '/')
        if path[0] == '/':
            return string.split(path, '/')
        return string.split(os.getcwd(), '/') + string.split(path, '/')
    if not absolute:
        starts = get_parts(start)
        goals = get_parts(goal)
        maximum = min(len(starts), len(goals))
        for counter in range(maximum):
            if starts[counter] != goals[counter]:
                break
        else:
            counter = maximum
        if relative or len(starts) < 2*counter:
            return string.join(['..']*(len(starts)-counter) + goals[counter:],
                               '/')
    if goal[0] != '/':
        return '%s/%s' % (os.getcwd(), goal)
    return goal

if __name__ == '__main__':
    apply(main, sys.argv[1:])

-- 
François Pinard   http://www.iro.umontreal.ca/~pinard

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]