This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
Re: shrinking /usr/lib/locale
[Jakub Jelinek]
> We used similar scripts initially, but after we added all locales in UTF-8
> this took eons, so ATM we're using a program for this (attached below).
Hi, people. Here is my cut of something similar, in case useful. :-)
I call it `link-dups'. Forget about `-sar' options if you want hard links.
#!/usr/bin/env python
# Replace identical duplicated files with links.
# Copyright © 1997, 1998, 1999, 2000 Progiciels Bourbeau-Pinard inc.
# François Pinard <pinard@iro.umontreal.ca>, janvier 1991.
# All ordinary files in the current hierarchy are checksummed,
# then files having identical sums are compared. Disk space is
# reclaimed by replacing identical files by links.
"""\
Usage: link-dups [OPTION]... [PATH]...
-v Display groups of identical files
-n Dry run, do not touch file system
-s Best symbolic links instead of hard links
-a Absolute symbolic links instead of best
-r Relative symbolic links instead of best
"""
import getopt, os, stat, string, sys
class run:
dry = 0
verbose = 0
absolute = 0
symbolic = 0
relative = 0
currents = string.split(os.getcwd(), '/')
def main(*arguments):
if not arguments:
sys.stdout.write(__doc__)
sys.exit(0)
# Decode call.
options, arguments = getopt.getopt(arguments, 'anrsv')
for option, value in options:
if option == '-a':
run.absolute = 1
if option == '-n':
run.dry = 1
elif option == '-r':
run.relative = 1
elif option == '-s':
run.symbolic = 1
elif option == '-v':
run.verbose = 1
assert not (run.absolute and run.relative)
if run.absolute or run.relative:
run.symbolic = 1
# Establish links.
current_sum = current_file = None
current_dev = current_ino = 0
for line in os.popen('find %s -type f -print0 | xargs -0 md5sum | sort'
% string.join(arguments)).readlines():
sum, file = string.split(line[:-1], None, 1)
if file[:2] == './':
file = file[2:]
info = os.stat(file)
links = info[stat.ST_NLINK]
if sum != current_sum:
if run.verbose:
print
print '#', links, file
current_sum, current_file = sum, file
current_dev, current_ino = info[stat.ST_DEV], info[stat.ST_INO]
continue
if run.verbose:
print '#', links, file
if ((info[stat.ST_DEV] == current_dev
and info[stat.ST_INO] == current_ino)):
continue
#if info[stat.ST_SIZE] == 0:
# continue
if open(current_file).read() != open(file).read():
continue
if run.symbolic:
symlink(current_file, file)
else:
hardlink(current_file, file)
def hardlink(origin, destination):
sys.stdout.write('ln -f %s %s\n' % (origin, destination))
if not run.dry:
os.remove(destination)
os.link(origin, destination)
def symlink(origin, destination):
assert os.path.exists(origin)
assert not os.path.islink(origin)
origin = nicest_path(destination, origin,
absolute=absolute, relative=relative)
assert origins
sys.stdout.write('ln -sf %s %s\n' % (origin, destination))
if not run.dry:
os.remove(destination)
os.symlink(origin, destination)
def nicest_path(start, goal, absolute=0, relative=0):
def get_parts(path):
if path[0] == '~':
return string.split(os.path.expanduser(path), '/')
if path[0] == '/':
return string.split(path, '/')
return string.split(os.getcwd(), '/') + string.split(path, '/')
if not absolute:
starts = get_parts(start)
goals = get_parts(goal)
maximum = min(len(starts), len(goals))
for counter in range(maximum):
if starts[counter] != goals[counter]:
break
else:
counter = maximum
if relative or len(starts) < 2*counter:
return string.join(['..']*(len(starts)-counter) + goals[counter:],
'/')
if goal[0] != '/':
return '%s/%s' % (os.getcwd(), goal)
return goal
if __name__ == '__main__':
apply(main, sys.argv[1:])
--
François Pinard http://www.iro.umontreal.ca/~pinard