Maintenance and Porting of Software by Design Recovery, Guillermo Arango and Ira Baxter and Peter Freeman
@InProceedings{ arango.baxter.ea:maintenance,
author = {Guillermo Arango and Ira Baxter and Peter Freeman},
title = {Maintenance and Porting of Software by Design Recovery},
booktitle = {CSM'85: Proceedings of the 1985 Conference on Software
Maintenance, {\rm (Washington, DC; November 11-13, 1985)}},
year = {November 1985},
pages = {42-49},
abstract = {DRACO paper on porting through transformation from source
code to abstraction back to new code. Captures
domain-specific knowledge.},
class = {Reengineering_in_General, Experiences, Alteration,
Re-Code, Program_Transformations,
Software_Reverse_Engineering, Reverse_Design,
Knowledge-Based_Concept_Assignment,
Human_Oriented_Concept_Assignment_by_Informal_Reasoning },
keywords = {domain modeling, domain analysis, DRACO}
}
TMM: Software Maintenance by Transformation, Guillermo Arango and Ira Baxter and Peter Freeman and Christopher Pidgeon
@Article{ arango.baxter.ea:tmm*1,
author = {Guillermo Arango and Ira Baxter and Peter Freeman and
Christopher Pidgeon},
title = {{TMM}: Software Maintenance by Transformation},
journal = {IEEE Software},
month = {May},
year = {1986},
volume = {3},
number = {3},
pages = {27-39},
abstract = { . Another DRACO-based paper. . Uses least common
abstractions. },
keywords = {domain modeling, domain analysis, DRACO},
class = {Reengineering_in_General, Experiences, Alteration,
Re-Code, Program_Transformations,
Software_Reverse_Engineering, Reverse_Design,
Knowledge-Based_Concept_Assignment,
Human_Oriented_Concept_Assignment_by_Informal_Reasoning}
}
The concept assignment problem in program understanding, Ted J. Biggerstaff and B. G. Mitbander and D. Webster
@InProceedings{ biggerstaff.mitbander.ea:concept,
author = {Ted J. Biggerstaff and B. G. Mitbander and D. Webster},
title = {The concept assignment problem in program understanding},
pages = {482--498},
booktitle = {Proceedings of the 15th International Conference on
Software Engineering },
year = {1993},
publisher = {IEEE Computer Society Press},
month = apr,
abstract = {A person understands a program because they are able to
relate the structures of the program and its environment to
their human oriented conceptual knowledge about the world.
The problem of discovering individual human oriented
concepts and assigning them to their implementation
oriented counterparts for a given program is the concept
assignment problem. The authors will argue that the
solution to this problem requires methods that have a
strong plausible reasoning component. We will illustrate
these ideas through example scenarios using an existing
design recovery system called DESIRE. Finally, the authors
will evaluate DESIRE based on its usage on real-world
problems over the years.},
keywords = {reverse engineering, slicing, knowledge base, domain,
connectionist, concept recognition, plausible reasoning},
contents = {To understand an unknown program one must create or
reconstruct the informal, human oriented expression of
computational intent through a process of analysis,
experimentation, guessing and crossword puzzle-like
assembly. As the informal concepts are discovered and
interrelated concept by concept, they are simultaniously
associated with or assigned to the spcific implmentation
structurees within the program that are the concrete
instances of those concepts. One operational model for the
concept recognition and understanding process it to view it
as a parsing process. The recognizer program uses a finite
set of pattern templates that recognize the concept
signatures by a parsing process, where the simplest, most
elemental concepts are recognized first and the these
concepts become features of largergrained, composite
concepts. For parsing technologies to be effective, they
rely heavily upon the premise that the concepts to be
recognized are completely and (mostly) unambigiously
determined by the formal, structural features of the entity
being parsed and these features are contextually quite
local. Another model for the concept assignment problem
assumes that the formal, structural features play a lesser
role in the recognition of concepts that are important for
human understanding and further, that the patterns defining
these important concepts are far more open to variation and
ambiguity than can be naturally accomodated by parsing
technology. The hypothesis of this paper is that a
parsing-oriented recognition model based on formal,
predominately structural patterns of programming languages
features is necessary but insufficient for the general
concept assignment problem mainly because the signatures of
most human oriented concepts are not constrained in ways
that are convenient for parsing technologies.
The properties of concept types are:
Property | Programming Concepts | Human Concepts
--------------------------------------------------------------------
Domain | Numerical computation and | Arbitrary domain
characterization | data manipulation | concepts
--------------------------------------------------------------------
Feature types | Formal elements | Formal and informal | -
Language syntax and | - Natural language | semantics |
tokens | - Data flow | - proximity and | - Control flow |
grouping | - Deducible properties | - design conventions |
| - domain conventions | | - previous solution | | states |
| - weight of evidence
_____________________________________________________________________
Reasoning method | Deductive or algorithmic | Plausible or
fuzzy | | reasoning
_____________________________________________________________________
Uniqueness of | Unique or canonical | Multiple equivalent
solution | | solutions
_____________________________________________________________________
Precision | Precise | Approximate
Four scenarios for automated assistance by a concept
assignment tools are presented. The tools can be classified
into naive (call graph viewer, slicer, cluser analysis
tool/module view, prolog query system) and intelligent
tools (DM-TAO).
1) Suggestive Data Names as First Clue 2) Suggestive
Function Namens as First Clue 3) Patterns of Relationships
as First Clue 4) Intelligent Agent Provides First Clue
DM-TAO (using a domain model) answers the following
questions: 1) Conceptual Highlights: Look for any concept
that correspond to some concept in your domain model 2)
Conceptual Grep: Look for instances of a user-specified
concept 3) What's this?: Propose a concept assignment for
the currently selected code
DM-TAO uses a domain model to drive a connectionist-based
inference engine. The domain model is built as a network in
which each concept is represented as a node and the
relationships between nodes are represented as explicit
links. The information associated with each concept
includes: the typical features that characterize the
concept, its relationships to other concepts in the domain,
relevant informal knowledge, the syntactic and/or
conceptual context this concept is likely to occur in, etc.
The domain model also captures the underlying semantics in
the target domain through a rich set of interconcept
relations embodying the nature and degree of the semantic
associations between the domain concepts. To facilitate
inferencing, this domain information is represented as a
semantic/connectionist hybrid network. The concepts an
their features are represented by nodes, which are of
different types: concept node, feature node, term node,
syntax node, etc., depending on the information being
represented. The nodes are grouped together into layers.
The feature, term and syntax nodes form the input layer of
the network, while the concept nodes are loosely organized
at different levels of abstractions, generally reflecting
the conceptual infrastructure of the domain model. The
different inter-concept relationships present in the domain
model are represented by corresponding inter-node link
types. Every link in the system has a real-valued weight
associated with it, quantifying the strength of the
relationship between the two nodes connected by it.
The nodes serve as the processing units of the network and
generate appropriate signal strengths or activation levels
as a nonlinear function of the input. For most nodes
(except those in the input layer), the input is a function
of the activations generated by the nodes in the previous
layer that they are connected to, modulated by the weight
on the connecting link. Nodes in the input layer are
directly driven by the actions of a feature-extractor which
scans the target code for relevant features - such as
syntactic clues, lexical terms which might embody a
concept-reference, clustering clues, etc. Their activation
level is a function of the number of corresponding clues
found in the current target code segment, the degree of the
match, and the activation history of related feature notes.
The signals generated in the input layer a propageted
throughout the network via a controlled spreading
activation process, which continues until the concept nodes
compute their activation levels. If the computed output of
a concept node is higher than a certain value - called the
recognition threshold, then the domain concept represented
by that concept node is predicted to be present in the
corresponding section of code from which the relevant clues
were extracted.
The accuracy of prediction of the network is a function of
the weights distributed on it's links. The system adapts
it's response via a 'training' process, which modulates
these weights according to certain rules to obtain an
optimal distribution. In DM-TAO, the training is effected
in two stages: (1) The network is initially primed with a
priori knowledge from the domain model regarding the degree
of the association between two connected concepts (a
qualitative assessment of low, medium or high provided by
the domain builder). (2) The network weights are adjusted
in a performance driven manner using qualitative relevance
feedback from the user regarding the validity of the
tentative concept assignments made by the system.
The concept recognition tool DESIRE is evaluated. Strengths
and weaknesses are described. A relation to commercial
products and other research is given.},
note = {The problem of discovering abstract human oriented
concepts and relating them to their implementation oriented
counterparts is called the {\rm concept assignment
problem}. Describes various heuristic clues, as supported
by the DESIRE system, that can be used for concept
extraction},
class = {Software_Reverse_Engineering, Reverse_Design,
Knowledge-Based_Concept_Assignment,
Human_Oriented_Concept_Assignment_by_Informal_Reasoning}
}
Program Understanding and the Concept Assignment Problem, Ted J. Biggerstaff and Bharat G. Mitbander and Dallas Webster
@Article{ biggerstaff.mitbander.ea:program,
author = {Ted J. Biggerstaff and Bharat G. Mitbander and Dallas
Webster},
title = {Program Understanding and the Concept Assignment Problem},
journal = {Communications of the ACM},
volume = {37(5)},
year = {May 1994},
pages = {72-83},
abstract = {},
class = {Software_Reverse_Engineering, Reverse_Design,
Knowledge-Based_Concept_Assignment,
Human_Oriented_Concept_Assignment_by_Informal_Reasoning }
}
Human-oriented Conceptual Abstractions in the Reengineering of Software, Ted J. Biggerstaff
@InProceedings{ biggerstaff:human-oriented,
author = {Ted J. Biggerstaff},
title = {Human-oriented Conceptual Abstractions in the
Reengineering of Software},
booktitle = {Proceedings of the 12th International Conference on
Software Engineering },
pages = {120},
month = mar,
year = {1990},
abstract = {Semiformal, human-oriented, and domain-specific
abstractions play a critical role in both reverse and
forward engineering, and therefore, in reengineering. Such
conceptual abstractions are fundamental to the
reengineering process whether it is a totally manual or
partially automated process.},
class = {Software_Reverse_Engineering, Reverse_Design,
Knowledge-Based_Concept_Assignment,
Human_Oriented_Concept_Assignment_by_Informal_Reasoning}
}
Clarity Guided Belief Revision for Domain Knowledge Recovery in Legacy Systems, Yang Li and Hongji Yang and William Chu
Available as .
@InProceedings{ li.yang.ea:clarity,
author = {Yang Li and Hongji Yang and William Chu},
title = {Clarity Guided Belief Revision for Domain Knowledge
Recovery in Legacy Systems},
booktitle = {Proceedings of the 12th International Conference on
Software Engineering and Knowledge Engineering (SEKE2000)},
publisher = {Knowledge System Institute},
year = {2000},
editor = {Daniel E. Cooke and Joseph E. Urban},
chapter = {},
pages = {248-255},
address = {Chicago, USA},
month = {June},
url = {},
abstract = {Program understanding is the process of acquiring
knowledge from a computer program. Although research work
utilising knowledge engineering techniques has been
undertaken in this field, it is our observation that a
thorough application of AI methodology has not been
sufficiently explored. In this paper, we present a clarity
guided belief revision approach to domain knowledge
recovery in legacy software systems. Novel solutions are
given to three key AI issues in the context of domain
knowledge recovery from source code: knowledge
representation, where concrete semantic network is
separated from abstract semantic network to better
accommodate uncertainty reasoning and propagation;
uncertainty reasoning, which borrows ideas from
confirmation theory and recasts them in the context of
semantic network reasoning; heuristic search, which is
designed on the principle of programming psychology. Our
approach is light-weighted. It can be used stand-alone or
as a complement to traditional heavy-weighted domain
knowledge recovery methods. },
keywords = {program understanding, knowledge recovery, semantic
network, belief revision, heuristic search, programming
psychology},
note = {This paper describes our innovative work where
psychology-based methodology was brought into the area of
Articial Intelligence and was applied in the field of
domain knowledge recovery from source code.},
class = {Knowledge-Based_Concept_Assignment System_Modularizatio
Model_Generating Reverse_Specification Metrics
Reverse_Design Domain_Analysis
Metric-Based_Methods_in_Reverse_Design
Human_Oriented_Concept_Assignment_by_Informal_Reasoning
Software_Reverse_Engineering }
}
Generating Linkage between Source Code and Evolvable Domain Knowledge for the Ease of Software Evolution, Yang Li and Hongji Yang and William Chu
Available as .
@InProceedings{ li.yang.ea:generating,
author = {Yang Li and Hongji Yang and William Chu},
title = {Generating Linkage between Source Code and Evolvable
Domain Knowledge for the Ease of Software Evolution},
booktitle = {Proceedings of IEEE International Symposium on Principles
of Software Evolution (ISPSE2000)},
publisher = {IEEE Computer Society Press},
year = {2000},
editor = {},
chapter = {},
pages = {},
address = {Kanazawa, Japan},
month = {Nov},
url = {},
abstract = {Business software systems unexceptably need to be evolved
to cater for new/changed requirement coming from market or
adapt to new operating environment. One of the most
significant problems in current software evolution practice
is that software maintainers usually find it quite
difficult to locate the program sections in source code
which need to be modified and to identify the extent to
which the changes in these program sections could affect
the rest of the software system. In this paper, we propose
a knowledge engineering based approach to solving this
problem. In particular, we match a software program with a
pre-defined domain knowledge base in the representation of
simplified semantic network we proposed in order to link
the source program with its domain level interpretation.
The domain knowledge base contains only important domain
knowledge where potential evolutions could occur, which
reduces the size of the knowledge base. Moreover, a domain
oriented program partitioning method is also proposed to
cut a program into self-contained modules with manageable
size. In these ways, the computational complexity involved
in generating the linkage is significantly reduced which
makes this approach applicable. An example shows that
software evolution can be easily carried out as the domain
knowledge it links with evolves. },
keywords = {software evolution, knowledge engineering, program
partitioning, evolvable domain knowledge, semantic
network},
note = {This paper gives engineering-oriented considerations to
link generation between domain knowledge and source code
prior to successful software evolution.},
class = {Software_Evolution Knowledge-Based_Concept_Assignment
Using_graphs Change_Impac
Cognitive_Processes_in_Human_Program_Understanding Metrics
Reverse_Design Re-Design System_Modularization
Recovery_of_Software_Architecture
Metric-Based_Methods_in_Reverse_Design Alteration
Human_Oriented_Concept_Assignment_by_Informal_Reasoning
Intermediate_Representations_of_Source_Code
Software_Reverse_Engineering }
}
Towards Building a Smarter Domain Knowledge Recovery Assistant, Yang Li and Hongji Yang and William Chu
Available as .
@InProceedings{ li.yang.ea:towards,
author = {Yang Li and Hongji Yang and William Chu},
title = {Towards Building a Smarter Domain Knowledge Recovery
Assistant},
booktitle = {Proceedings of the 24th IEEE Annual Computer Software and
Applications Conference (COMPSAC2000)},
publisher = {IEEE Computer Society Press},
year = {2000},
editor = {},
chapter = {},
pages = {},
address = {},
month = {Oct},
url = {},
abstract = {Legacy systems need to be ``salvaged'' to prolong their
life circle. One way for such a salvation is to recover and
maintain domain knowledge embedded in legacy code. It is
our observation that existing methods or tools for domain
knowledge recovery from source code did not provide
maintainers with sufficient assistance to reduce the size
of analysable program sections, identify program sections
having intensive domain knowledge and maintain the belief
of a network of domain knowledge extracted from source code
which can accommodate change of belief coming from a user.
In this paper, we introduce techniques which can provide
software maintainers with smart assistance for the
above-mentioned three issues. },
keywords = {program partitioning, program readability metric, belief
network, domain knowledge recovery},
note = {We incorpate human psychology knowledge with the design of
a domain knowledge recovery tool.},
class = {Automated_Reverse_Design
Knowledge-Based_Concept_Assignment Reverse_Engineering_Tool
Model_Generating Reverse_Specification
Cognitive_Processes_in_Human_Program_Understanding Metrics
Reverse_Design System_Modularization Domain_Analysis
Recovery_of_Software_Architecture
Metric-Based_Methods_in_Reverse_Design
Human_Oriented_Concept_Assignment_by_Informal_Reasoning
Software_Reverse_Engineering }
}
Fusing Ambiguous Domain Knowledge Slices in a Reverse Engineering Process, Yang Li and Hongji Yang
Available as .
@InProceedings{ li.yang:fusing,
author = {Yang Li and Hongji Yang},
title = {Fusing Ambiguous Domain Knowledge Slices in a Reverse
Engineering Process},
booktitle = {Proceedings of the 7th Asia-Pacific Software Engineering
Conference (APSEC2000)},
publisher = {IEEE Computer Society Press},
year = {2000},
editor = {},
chapter = {},
pages = {},
address = {Singapore},
month = {Dec},
url = {},
abstract = {Recovering domain knowledge from legacy code plays an
important role in the new information technology era, which
can be of help for program understanding, system evolution
and software reuse. Traditional methods for domain
knowledge recovery from source code did not sufficiently
address the issue of ambiguity handling, in particular, the
propagation of ambiguity among multiple domain knowledge
slices recovered from source code in software reverse
engineering process. In this paper, we present a novel
approach to recovering unambiguous domain knowledge from
legacy code, where isolated ambiguous domain knowledge
slices are ``fused'' together in an iterative ambiguity
propagation process and hence the disambiguity of these
recovered knowledge slices is increased. },
keywords = {reverse engineering, domain knowledge recovery,
co-operative behaviour, belief revision},
note = {This is the first of this kind of work which deals with
the ambiguity involved in recovering large-scale domain
knowledge from source code.},
class = {Automated_Reverse_Design
Knowledge-Based_Concept_Assignment Using_graphs
Model_Generating Reverse_Specification
Cognitive_Processes_in_Human_Program_Understanding
Reverse_Design Domain_Analysis
Recovery_of_Software_Architectur
Metric-Based_Methods_in_Reverse_Design
Human_Oriented_Concept_Assignment_by_Informal_Reasoning
Intermediate_Representations_of_Source_Code
Software_Reverse_Engineering }
}
Reuse of Modular Software with Automated Comment Analysis, Stan Matwin and Affa Ahmad
@InProceedings{ matwin.ahmad:reuse,
author = {Stan Matwin and Affa Ahmad},
title = {Reuse of Modular Software with Automated Comment
Analysis},
booktitle = {Proceedings of the International Conference on Software
Maintenance ~1994},
year = {1994},
pages = {222-231},
publisher = {IEEE Computer Society Press},
month = sep,
abstract = {The paper presents an approach to software reuse based on
automatic analysis of program comments. First, domain terms
are extracted from the comments in a semi-automatic
procedure. Those terms are then used in an off-th-shelf
Case-based Reasoning system as indices for software
modules. oun phrases extracted from comments in LINPACK
(widely distributed linaer algebra package) form the basis
of simple domain models for linaer systems. The process of
constructing a reuse system is broken into three steps. A
file containing comments from all LINPACK routines is
processed to yield a list of technical phrases. The second
step involves building domain models based on an anylysis
of these technical phrases and then indexing cases
according to these models. Finally, tools provided by the
REMIND Case Based Reasoning (CBR) shell are used to create
a case library incoporationg this domain knowledge. Early
experiments described in the paper show that noun phrases
automatically extracted from the comments can provide
useful functional description of the routines. The
resulting simple domain models are usually sufficient for
softeare reuse application. Finally, we found standard CBR
technology to be a viable means of constructing
compositional software reuse libraries.},
class = {Software_Reverse_Engineering, Reverse_Design,
Knowledge-Based_Concept_Assignment,
Human_Oriented_Concept_Assignment_by_Informal_Reasoning}
}