Debianで現時点のパッケージ状態に戻す情報を取得する

Debian(Ubuntu)を再インストールしたときに、現状のaptのパッケージ状態まで戻す情報を収集するPythonスクリプトを書いてみました。

このプログラムは、apt-getなどが出力する/var/log/history.log(logrotateでアーカイブされたログも含む)を利用して、ログ取得が開始された時点から、インストールされたパッケージ、削除されたパッケージの情報を収集を行い表示します。

データのユニーク化にsetを使っているところがキモ。ここを参考にさせていただきました。

ライセンス : zlib/libpng License

# coding: utf-8
# vim: expandtab ts=2 sts=2 sw=2

# Copyright (c) 2011 yukke.org
# 
# This software is provided 'as-is', without any express or implied
# warranty. In no event will the authors be held liable for any damages
# arising from the use of this software.
# 
# Permission is granted to anyone to use this software for any purpose,
# including commercial applications, and to alter it and redistribute it
# freely, subject to the following restrictions:
# 
#  1. The origin of this software must not be misrepresented; you must not
#     claim that you wrote the original software. If you use this software
#     in a product, an acknowledgment in the product documentation would be
#     appreciated but is not required.
# 
#  2. Altered source versions must be plainly marked as such, and must not be
#     misrepresented as being the original software.
# 
#  3. This notice may not be removed or altered from any source
#     distribution.

import sys
import os
import gzip
import re

class AptLogAnalizer(object):
  RE_TARGET_PARSE_WORD = re.compile(r'^(Install|Remove|Purge):')
  RE_REMOVE_VERSION = re.compile(r':.+? \(.+?\)')
  
  def __init__(self, logs_path='/var/log/apt'):
    self.install_apps = []
    self.remove_apps = []
    self._logs_path = logs_path
    self._load_log_files()

  def _load_log_files(self):
    for filename in os.listdir(self._logs_path):
      if filename.startswith('history'):
        realfilename = os.path.join(self._logs_path, filename)
        if filename.endswith('.gz'):
          fh = gzip.open(realfilename, 'rb')
        else:
          fh = open(realfilename, 'rb')
        self._parse(fh)
        fh.close()
    self._clean()

  def _parse(self, fh):
    for line in fh:
      matches = self.RE_TARGET_PARSE_WORD.search(line)
      if matches:
        action = matches.group(1)
        value = self.RE_TARGET_PARSE_WORD.sub('', line).strip()
        value = self.RE_REMOVE_VERSION.sub('', value)
        app_names = [ s.strip() for s in value.split(',') ]

        if action == "Install":
          self.install_apps.extend(app_names)
        else:
          self.remove_apps.extend(app_names)

  def _clean(self):
    install_apps = set(self.install_apps)
    remove_apps = set(self.remove_apps)
    self.install_apps = list(install_apps - remove_apps)
    self.remove_apps = list(remove_apps - install_apps)
    self.install_apps.sort()
    self.remove_apps.sort()

  def show_installed_apps(self):
    print("\n".join(self.install_apps))
    
  def show_removed_apps(self):
    print("\n".join(self.remove_apps))


def main():
  t = AptLogAnalizer()
  t.show_installed_apps()
  t.show_removed_apps()

if __name__ == '__main__':
  main()